Tidy up, tests and docs

2025-10-28 22:47:52 +03:00 · 2020-10-04 13:54:05 +02:00 · 2020-10-04 13:54:05 +02:00 · 11347f34da
commit 11347f34da
parent 96b636c2d3
5 changed files with 193 additions and 152 deletions
--- a/spacy/pipeline/attributeruler.py
+++ b/spacy/pipeline/attributeruler.py
@ -1,10 +1,11 @@
 from typing import List, Dict, Union, Iterable, Any, Optional, Callable, Iterator
 from typing import Tuple
 import srsly
 from typing import List, Dict, Union, Iterable, Any, Optional
 from pathlib import Path
 from .pipe import Pipe
 from ..errors import Errors
-from ..training import validate_examples
+from ..training import validate_examples, Example
 from ..language import Language
 from ..matcher import Matcher
 from ..scorer import Scorer
@ -22,17 +23,9 @@ TagMapType = Dict[str, Dict[Union[int, str], Union[int, str]]]
 MorphRulesType = Dict[str, Dict[str, Dict[Union[int, str], Union[int, str]]]]
-@Language.factory(
+@Language.factory("attribute_ruler", default_config={"validate": False})
-    "attribute_ruler", default_config={"validate": False}
+def make_attribute_ruler(nlp: Language, name: str, validate: bool):
-)
+    return AttributeRuler(nlp.vocab, name, validate=validate)
 def make_attribute_ruler(
    nlp: Language,
    name: str,
    validate: bool,
 ):
    return AttributeRuler(
        nlp.vocab, name, pattern_dicts=pattern_dicts, validate=validate
    )
 class AttributeRuler(Pipe):
@ -43,12 +36,7 @@ class AttributeRuler(Pipe):
    """
    def __init__(
-        self,
+        self, vocab: Vocab, name: str = "attribute_ruler", *, validate: bool = False
        vocab: Vocab,
        name: str = "attribute_ruler",
        *,
        pattern_dicts: Optional[Iterable[AttributeRulerPatternType]] = None,
        validate: bool = False,
    ) -> None:
        """Create the AttributeRuler. After creation, you can add patterns
        with the `.initialize()` or `.add_patterns()` methods, or load patterns
@ -71,12 +59,12 @@ class AttributeRuler(Pipe):
    def initialize(
        self,
-        get_examples: Optional[Callable[[], Iterable[Example]]] = None,
+        get_examples: Optional[Callable[[], Iterable[Example]]],
        *,
        nlp: Optional[Language] = None,
        patterns: Optional[Iterable[AttributeRulerPatternType]] = None,
-        tag_map: Optional[TagMapType]=None,
+        tag_map: Optional[TagMapType] = None,
-        morph_rules: Optional[MorphRulesType]=None
+        morph_rules: Optional[MorphRulesType] = None,
    ):
        """Initialize the attribute ruler by adding zero or more patterns.
@ -126,7 +114,7 @@ class AttributeRuler(Pipe):
            set_token_attrs(span[index], attrs)
        return doc
-    def pipe(self, stream, *, batch_size=128):
+    def pipe(self, stream: Iterable[Doc], *, batch_size: int = 128) -> Iterator[Doc]:
        """Apply the pipe to a stream of documents. This usually happens under
        the hood when the nlp object is called on a text and all components are
        applied to the Doc.
@ -210,16 +198,16 @@ class AttributeRuler(Pipe):
        self.attrs.append(attrs)
        self.indices.append(index)
-    def add_patterns(self, pattern_dicts: Iterable[AttributeRulerPatternType]) -> None:
+    def add_patterns(self, patterns: Iterable[AttributeRulerPatternType]) -> None:
        """Add patterns from a list of pattern dicts with the keys as the
        arguments to AttributeRuler.add.
-        pattern_dicts (Iterable[dict]): A list of pattern dicts with the keys
+        patterns (Iterable[dict]): A list of pattern dicts with the keys
            as the arguments to AttributeRuler.add (patterns/attrs/index) to
            add as patterns.
        DOCS: https://nightly.spacy.io/api/attributeruler#add_patterns
        """
-        for p in pattern_dicts:
+        for p in patterns:
            self.add(**p)
    @property
@ -234,7 +222,7 @@ class AttributeRuler(Pipe):
            all_patterns.append(p)
        return all_patterns
-    def score(self, examples, **kwargs):
+    def score(self, examples: Iterable[Example], **kwargs) -> Dict[str, Any]:
        """Score a batch of examples.
        examples (Iterable[Example]): The examples to score.
@ -275,7 +263,7 @@ class AttributeRuler(Pipe):
    def from_bytes(
        self, bytes_data: bytes, exclude: Iterable[str] = SimpleFrozenList()
-    ):
+    ) -> "AttributeRuler":
        """Load the AttributeRuler from a bytestring.
        bytes_data (bytes): The data to load.
@ -293,7 +281,6 @@ class AttributeRuler(Pipe):
            "patterns": load_patterns,
        }
        util.from_bytes(bytes_data, deserialize, exclude)
        return self
    def to_disk(
@ -303,6 +290,7 @@ class AttributeRuler(Pipe):
        path (Union[Path, str]): A path to a directory.
        exclude (Iterable[str]): String names of serialization fields to exclude.
        DOCS: https://nightly.spacy.io/api/attributeruler#to_disk
        """
        serialize = {
@ -313,11 +301,13 @@ class AttributeRuler(Pipe):
    def from_disk(
        self, path: Union[Path, str], exclude: Iterable[str] = SimpleFrozenList()
-    ) -> None:
+    ) -> "AttributeRuler":
        """Load the AttributeRuler from disk.
        path (Union[Path, str]): A path to a directory.
        exclude (Iterable[str]): String names of serialization fields to exclude.
        RETURNS (AttributeRuler): The loaded object.
        DOCS: https://nightly.spacy.io/api/attributeruler#from_disk
        """
@ -329,11 +319,10 @@ class AttributeRuler(Pipe):
            "patterns": load_patterns,
        }
        util.from_disk(path, deserialize, exclude)
        return self
-def _split_morph_attrs(attrs):
+def _split_morph_attrs(attrs: dict) -> Tuple[dict, dict]:
    """Split entries from a tag map or morph rules dict into to two dicts, one
    with the token-level features (POS, LEMMA) and one with the remaining
    features, which are presumed to be individual MORPH features."""
--- a/spacy/tests/pipeline/test_attributeruler.py
+++ b/spacy/tests/pipeline/test_attributeruler.py
@ -63,6 +63,39 @@ def morph_rules():
    return {"DT": {"the": {"POS": "DET", "LEMMA": "a", "Case": "Nom"}}}
 def check_tag_map(ruler):
    doc = Doc(
        ruler.vocab,
        words=["This", "is", "a", "test", "."],
        tags=["DT", "VBZ", "DT", "NN", "."],
    )
    doc = ruler(doc)
    for i in range(len(doc)):
        if i == 4:
            assert doc[i].pos_ == "PUNCT"
            assert str(doc[i].morph) == "PunctType=peri"
        else:
            assert doc[i].pos_ == ""
            assert str(doc[i].morph) == ""
 def check_morph_rules(ruler):
    doc = Doc(
        ruler.vocab,
        words=["This", "is", "the", "test", "."],
        tags=["DT", "VBZ", "DT", "NN", "."],
    )
    doc = ruler(doc)
    for i in range(len(doc)):
        if i != 2:
            assert doc[i].pos_ == ""
            assert str(doc[i].morph) == ""
        else:
            assert doc[2].pos_ == "DET"
            assert doc[2].lemma_ == "a"
            assert str(doc[2].morph) == "Case=Nom"
 def test_attributeruler_init(nlp, pattern_dicts):
    a = nlp.add_pipe("attribute_ruler")
    for p in pattern_dicts:
@ -78,7 +111,8 @@ def test_attributeruler_init(nlp, pattern_dicts):
 def test_attributeruler_init_patterns(nlp, pattern_dicts):
    # initialize with patterns
-    nlp.add_pipe("attribute_ruler", config={"pattern_dicts": pattern_dicts})
+    ruler = nlp.add_pipe("attribute_ruler")
    ruler.initialize(lambda: [], patterns=pattern_dicts)
    doc = nlp("This is a test.")
    assert doc[2].lemma_ == "the"
    assert str(doc[2].morph) == "Case=Nom|Number=Plur"
@ -88,10 +122,11 @@ def test_attributeruler_init_patterns(nlp, pattern_dicts):
    assert doc.has_annotation("MORPH")
    nlp.remove_pipe("attribute_ruler")
    # initialize with patterns from asset
-    nlp.add_pipe(
+    nlp.config["initialize"]["components"]["attribute_ruler"] = {
-        "attribute_ruler",
+        "patterns": {"@misc": "attribute_ruler_patterns"}
-        config={"pattern_dicts": {"@misc": "attribute_ruler_patterns"}},
+    }
-    )
+    nlp.add_pipe("attribute_ruler")
    nlp.initialize()
    doc = nlp("This is a test.")
    assert doc[2].lemma_ == "the"
    assert str(doc[2].morph) == "Case=Nom|Number=Plur"
@ -103,18 +138,15 @@ def test_attributeruler_init_patterns(nlp, pattern_dicts):
 def test_attributeruler_score(nlp, pattern_dicts):
    # initialize with patterns
-    nlp.add_pipe("attribute_ruler", config={"pattern_dicts": pattern_dicts})
+    ruler = nlp.add_pipe("attribute_ruler")
    ruler.initialize(lambda: [], patterns=pattern_dicts)
    doc = nlp("This is a test.")
    assert doc[2].lemma_ == "the"
    assert str(doc[2].morph) == "Case=Nom|Number=Plur"
    assert doc[3].lemma_ == "cat"
    assert str(doc[3].morph) == "Case=Nom|Number=Sing"
-
+    doc = nlp.make_doc("This is a test.")
-    dev_examples = [
+    dev_examples = [Example.from_dict(doc, {"lemmas": ["this", "is", "a", "cat", "."]})]
        Example.from_dict(
            nlp.make_doc("This is a test."), {"lemmas": ["this", "is", "a", "cat", "."]}
        )
    ]
    scores = nlp.evaluate(dev_examples)
    # "cat" is the only correct lemma
    assert scores["lemma_acc"] == pytest.approx(0.2)
@ -139,40 +171,27 @@ def test_attributeruler_rule_order(nlp):
 def test_attributeruler_tag_map(nlp, tag_map):
-    a = AttributeRuler(nlp.vocab)
+    ruler = AttributeRuler(nlp.vocab)
-    a.load_from_tag_map(tag_map)
+    ruler.load_from_tag_map(tag_map)
-    doc = Doc(
+    check_tag_map(ruler)
-        nlp.vocab,
+
-        words=["This", "is", "a", "test", "."],
+
-        tags=["DT", "VBZ", "DT", "NN", "."],
+def test_attributeruler_tag_map_initialize(nlp, tag_map):
-    )
+    ruler = nlp.add_pipe("attribute_ruler")
-    doc = a(doc)
+    ruler.initialize(lambda: [], tag_map=tag_map)
-    for i in range(len(doc)):
+    check_tag_map(ruler)
        if i == 4:
            assert doc[i].pos_ == "PUNCT"
            assert str(doc[i].morph) == "PunctType=peri"
        else:
            assert doc[i].pos_ == ""
            assert str(doc[i].morph) == ""
 def test_attributeruler_morph_rules(nlp, morph_rules):
-    a = AttributeRuler(nlp.vocab)
+    ruler = AttributeRuler(nlp.vocab)
-    a.load_from_morph_rules(morph_rules)
+    ruler.load_from_morph_rules(morph_rules)
-    doc = Doc(
+    check_morph_rules(ruler)
-        nlp.vocab,
+
-        words=["This", "is", "the", "test", "."],
+
-        tags=["DT", "VBZ", "DT", "NN", "."],
+def test_attributeruler_morph_rules_initialize(nlp, morph_rules):
-    )
+    ruler = nlp.add_pipe("attribute_ruler")
-    doc = a(doc)
+    ruler.initialize(lambda: [], morph_rules=morph_rules)
-    for i in range(len(doc)):
+    check_morph_rules(ruler)
        if i != 2:
            assert doc[i].pos_ == ""
            assert str(doc[i].morph) == ""
        else:
            assert doc[2].pos_ == "DET"
            assert doc[2].lemma_ == "a"
            assert str(doc[2].morph) == "Case=Nom"
 def test_attributeruler_indices(nlp):
--- a/website/docs/api/attributeruler.md
+++ b/website/docs/api/attributeruler.md
@ -4,6 +4,7 @@ tag: class
 source: spacy/pipeline/attributeruler.py
 new: 3
 teaser: 'Pipeline component for rule-based token attribute assignment'
 api_base_class: /api/pipe
 api_string_name: attribute_ruler
 api_trainable: false
 ---
@ -25,17 +26,13 @@ how the component should be configured. You can override its settings via the
 > #### Example
 >
 > ```python
-> config = {
+> config = {"validate": True}
 >    "pattern_dicts": None,
 >    "validate": True,
 > }
 > nlp.add_pipe("attribute_ruler", config=config)
 > ```
-| Setting         | Description                                                                                                                                                                                                                                    |
+| Setting    | Description                                                                                   |
-| --------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| ---------- | --------------------------------------------------------------------------------------------- |
-| `pattern_dicts` | A list of pattern dicts with the keys as the arguments to [`AttributeRuler.add`](/api/attributeruler#add) (`patterns`/`attrs`/`index`) to add as patterns. Defaults to `None`. ~~Optional[Iterable[Dict[str, Union[List[dict], dict, int]]]]~~ |
+| `validate` | Whether patterns should be validated (passed to the `Matcher`). Defaults to `False`. ~~bool~~ |
 | `validate`      | Whether patterns should be validated (passed to the `Matcher`). Defaults to `False`. ~~bool~~                                                                                                                                                  |
 ```python
 %%GITHUB_SPACY/spacy/pipeline/attributeruler.py
@ -43,36 +40,26 @@ how the component should be configured. You can override its settings via the
 ## AttributeRuler.\_\_init\_\_ {#init tag="method"}
-Initialize the attribute ruler. If pattern dicts are supplied here, they need to
+Initialize the attribute ruler.
 be a list of dictionaries with `"patterns"`, `"attrs"`, and optional `"index"`
 keys, e.g.:
 ```python
 pattern_dicts = [
    {"patterns": [[{"TAG": "VB"}]], "attrs": {"POS": "VERB"}},
    {"patterns": [[{"LOWER": "an"}]], "attrs": {"LEMMA": "a"}},
 ]
 ```
 > #### Example
 >
 > ```python
 > # Construction via add_pipe
-> attribute_ruler = nlp.add_pipe("attribute_ruler")
+> ruler = nlp.add_pipe("attribute_ruler")
 > ```
-| Name            | Description                                                                                                                              |
+| Name           | Description                                                                                                                              |
-| --------------- | ---------------------------------------------------------------------------------------------------------------------------------------- |
+| -------------- | ---------------------------------------------------------------------------------------------------------------------------------------- |
-| `vocab`         | The shared vocabulary to pass to the matcher. ~~Vocab~~                                                                                  |
+| `vocab`        | The shared vocabulary to pass to the matcher. ~~Vocab~~                                                                                  |
-| `name`          | Instance name of the current pipeline component. Typically passed in automatically from the factory when the component is added. ~~str~~ |
+| `name`         | Instance name of the current pipeline component. Typically passed in automatically from the factory when the component is added. ~~str~~ |
-| _keyword-only_  |                                                                                                                                          |
+| _keyword-only_ |                                                                                                                                          |
-| `pattern_dicts` | Optional patterns to load in on initialization. Defaults to `None`. ~~Optional[Iterable[Dict[str, Union[List[dict], dict, int]]]]~~      |
+| `validate`     | Whether patterns should be validated (passed to the [`Matcher`](/api/matcher#init)). Defaults to `False`. ~~bool~~                       |
 | `validate`      | Whether patterns should be validated (passed to the [`Matcher`](/api/matcher#init)). Defaults to `False`. ~~bool~~                       |
 ## AttributeRuler.\_\_call\_\_ {#call tag="method"}
-Apply the attribute ruler to a `Doc`, setting token attributes for tokens matched
+Apply the attribute ruler to a `Doc`, setting token attributes for tokens
-by the provided patterns.
+matched by the provided patterns.
 | Name        | Description                      |
 | ----------- | -------------------------------- |
@ -90,10 +77,10 @@ may be negative to index from the end of the span.
 > #### Example
 >
 > ```python
-> attribute_ruler = nlp.add_pipe("attribute_ruler")
+> ruler = nlp.add_pipe("attribute_ruler")
 > patterns = [[{"TAG": "VB"}]]
 > attrs = {"POS": "VERB"}
-> attribute_ruler.add(patterns=patterns, attrs=attrs)
+> ruler.add(patterns=patterns, attrs=attrs)
 > ```
 | Name       | Description                                                                                                                       |
@ -107,11 +94,10 @@ may be negative to index from the end of the span.
 > #### Example
 >
 > ```python
-> attribute_ruler = nlp.add_pipe("attribute_ruler")
+> ruler = nlp.add_pipe("attribute_ruler")
-> pattern_dicts = [
+> patterns = [
 >   {
->     "patterns": [[{"TAG": "VB"}]],
+>     "patterns": [[{"TAG": "VB"}]], "attrs": {"POS": "VERB"}
 >     "attrs": {"POS": "VERB"}
 >   },
 >   {
 >     "patterns": [[{"LOWER": "two"}, {"LOWER": "apples"}]],
@ -119,15 +105,16 @@ may be negative to index from the end of the span.
 >     "index": -1
 >   },
 > ]
-> attribute_ruler.add_patterns(pattern_dicts)
+> ruler.add_patterns(patterns)
 > ```
-Add patterns from a list of pattern dicts with the keys as the arguments to
+Add patterns from a list of pattern dicts. Each pattern dict can specify the
 keys `"patterns"`, `"attrs"` and `"index"`, which match the arguments of
 [`AttributeRuler.add`](/api/attributeruler#add).
-| Name            | Description                                                                |
+| Name       | Description                                                                |
-| --------------- | -------------------------------------------------------------------------- |
+| ---------- | -------------------------------------------------------------------------- |
-| `pattern_dicts` | The patterns to add. ~~Iterable[Dict[str, Union[List[dict], dict, int]]]~~ |
+| `patterns` | The patterns to add. ~~Iterable[Dict[str, Union[List[dict], dict, int]]]~~ |
 ## AttributeRuler.patterns {#patterns tag="property"}
@ -139,20 +126,39 @@ Get all patterns that have been added to the attribute ruler in the
 | ----------- | -------------------------------------------------------------------------------------------- |
 | **RETURNS** | The patterns added to the attribute ruler. ~~List[Dict[str, Union[List[dict], dict, int]]]~~ |
-## AttributeRuler.score {#score tag="method" new="3"}
+## AttributeRuler.initialize {#initialize tag="method"}
-Score a batch of examples.
+Initialize the component with data. Typically called before training to load in
 rules from a file. This method is typically called by
 [`Language.initialize`](/api/language#initialize) and lets you customize
 arguments it receives via the
 [`[initialize.components]`](/api/data-formats#config-initialize) block in the
 config.
 > #### Example
 >
 > ```python
-> scores = attribute_ruler.score(examples)
+> ruler = nlp.add_pipe("attribute_ruler")
 > ruler.initialize(lambda: [], nlp=nlp, patterns=patterns)
 > ```
 >
 > ```ini
 > ### config.cfg
 > [initialize.components.attribute_ruler]
 >
 > [initialize.components.attribute_ruler.patterns]
 > @readers = "srsly.read_json.v1"
 > path = "corpus/attribute_ruler_patterns.json
 > ```
-| Name        | Description                                                                                                                                                                                                           |
+| Name           | Description                                                                                                                                                                                                                                    |
-| ----------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| -------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `examples`  | The examples to score. ~~Iterable[Example]~~                                                                                                                                                                          |
+| `get_examples` | Function that returns gold-standard annotations in the form of [`Example`](/api/example) objects (the training data). Not used by this component. ~~Callable[[], Iterable[Example]]~~                                                          |
-| **RETURNS** | The scores, produced by [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attributes `"tag"`, `"pos"`, `"morph"` and `"lemma"` if present in any of the target token attributes. ~~Dict[str, float]~~ |
+| _keyword-only_ |                                                                                                                                                                                                                                                |
 | `nlp`          | The current `nlp` object. Defaults to `None`. ~~Optional[Language]~~                                                                                                                                                                           |
 | `patterns`     | A list of pattern dicts with the keys as the arguments to [`AttributeRuler.add`](/api/attributeruler#add) (`patterns`/`attrs`/`index`) to add as patterns. Defaults to `None`. ~~Optional[Iterable[Dict[str, Union[List[dict], dict, int]]]]~~ |
 | `tag_map`      | The tag map that maps fine-grained tags to coarse-grained tags and morphological features. Defaults to `None`. ~~Optional[Dict[str, Dict[Union[int, str], Union[int, str]]]]~~                                                                 |
 | `morph_rules`  | The morph rules that map token text and fine-grained tags to coarse-grained tags, lemmas and morphological features. Defaults to `None`. ~~Optional[Dict[str, Dict[str, Dict[Union[int, str], Union[int, str]]]]]~~                            |
 ## AttributeRuler.load_from_tag_map {#load_from_tag_map tag="method"}
@ -170,6 +176,21 @@ Load attribute ruler patterns from morph rules.
 | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | `morph_rules` | The morph rules that map token text and fine-grained tags to coarse-grained tags, lemmas and morphological features. ~~Dict[str, Dict[str, Dict[Union[int, str], Union[int, str]]]]~~ |
 ## AttributeRuler.score {#score tag="method" new="3"}
 Score a batch of examples.
 > #### Example
 >
 > ```python
 > scores = ruler.score(examples)
 > ```
 | Name        | Description                                                                                                                                                                                                           |
 | ----------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | `examples`  | The examples to score. ~~Iterable[Example]~~                                                                                                                                                                          |
 | **RETURNS** | The scores, produced by [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attributes `"tag"`, `"pos"`, `"morph"` and `"lemma"` if present in any of the target token attributes. ~~Dict[str, float]~~ |
 ## AttributeRuler.to_disk {#to_disk tag="method"}
 Serialize the pipe to disk.
@ -177,8 +198,8 @@ Serialize the pipe to disk.
 > #### Example
 >
 > ```python
-> attribute_ruler = nlp.add_pipe("attribute_ruler")
+> ruler = nlp.add_pipe("attribute_ruler")
-> attribute_ruler.to_disk("/path/to/attribute_ruler")
+> ruler.to_disk("/path/to/attribute_ruler")
 > ```
 | Name           | Description                                                                                                                                |
@ -194,8 +215,8 @@ Load the pipe from disk. Modifies the object in place and returns it.
 > #### Example
 >
 > ```python
-> attribute_ruler = nlp.add_pipe("attribute_ruler")
+> ruler = nlp.add_pipe("attribute_ruler")
-> attribute_ruler.from_disk("/path/to/attribute_ruler")
+> ruler.from_disk("/path/to/attribute_ruler")
 > ```
 | Name           | Description                                                                                     |
@ -210,8 +231,8 @@ Load the pipe from disk. Modifies the object in place and returns it.
 > #### Example
 >
 > ```python
-> attribute_ruler = nlp.add_pipe("attribute_ruler")
+> ruler = nlp.add_pipe("attribute_ruler")
-> attribute_ruler_bytes = attribute_ruler.to_bytes()
+> ruler = ruler.to_bytes()
 > ```
 Serialize the pipe to a bytestring.
@ -229,9 +250,9 @@ Load the pipe from a bytestring. Modifies the object in place and returns it.
 > #### Example
 >
 > ```python
-> attribute_ruler_bytes = attribute_ruler.to_bytes()
+> ruler_bytes = ruler.to_bytes()
-> attribute_ruler = nlp.add_pipe("attribute_ruler")
+> ruler = nlp.add_pipe("attribute_ruler")
-> attribute_ruler.from_bytes(attribute_ruler_bytes)
+> ruler.from_bytes(ruler_bytes)
 > ```
 | Name           | Description                                                                                 |
@ -250,12 +271,12 @@ serialization by passing in the string names via the `exclude` argument.
 > #### Example
 >
 > ```python
-> data = attribute_ruler.to_disk("/path", exclude=["vocab"])
+> data = ruler.to_disk("/path", exclude=["vocab"])
 > ```
-| Name       | Description                                                    |
+| Name       | Description                                                     |
-| ---------- | -------------------------------------------------------------- |
+| ---------- | --------------------------------------------------------------- |
-| `vocab`    | The shared [`Vocab`](/api/vocab).                              |
+| `vocab`    | The shared [`Vocab`](/api/vocab).                               |
-| `patterns` | The `Matcher` patterns. You usually don't want to exclude this.  |
+| `patterns` | The `Matcher` patterns. You usually don't want to exclude this. |
-| `attrs`    | The attributes to set. You usually don't want to exclude this. |
+| `attrs`    | The attributes to set. You usually don't want to exclude this.  |
-| `indices`  | The token indices. You usually don't want to exclude this.     |
+| `indices`  | The token indices. You usually don't want to exclude this.      |
--- a/website/docs/usage/linguistic-features.md
+++ b/website/docs/usage/linguistic-features.md
@ -1801,17 +1801,7 @@ print(doc2[5].tag_, doc2[5].pos_)  # WP PRON
 <Infobox variant="warning" title="Migrating from spaCy v2.x">
-For easy migration from from spaCy v2 to v3, the
+The [`AttributeRuler`](/api/attributeruler) can import a **tag map and morph rules** in the v2.x format via its built-in methods or when the component is initialized before training. See the [migration guide](/usage/v3#migrating-training-mappings-exceptions) for details.
 [`AttributeRuler`](/api/attributeruler) can import a **tag map and morph rules**
 in the v2 format with the methods
 [`load_from_tag_map`](/api/attributeruler#load_from_tag_map) and
 [`load_from_morph_rules`](/api/attributeruler#load_from_morph_rules).
 ```diff
 nlp = spacy.blank("en")
 + ruler = nlp.add_pipe("attribute_ruler")
 + ruler.load_from_tag_map(YOUR_TAG_MAP)
 ```
 </Infobox>
--- a/website/docs/usage/v3.md
+++ b/website/docs/usage/v3.md
@ -804,8 +804,30 @@ nlp = spacy.blank("en")
 Instead of defining a `tag_map` and `morph_rules` in the language data, spaCy
 v3.0 now manages mappings and exceptions with a separate and more flexible
 pipeline component, the [`AttributeRuler`](/api/attributeruler). See the
-[usage guide](/usage/linguistic-features#mappings-exceptions) for examples. The
+[usage guide](/usage/linguistic-features#mappings-exceptions) for examples. If
-`AttributeRuler` provides two handy helper methods
+you have tag maps and morph rules in the v2.x format, you can load them into the
 attribute ruler before training using the `[initialize]` block of your config.
 > #### What does the initialization do?
 >
 > The `[initialize]` block is used when
 > [`nlp.initialize`](/api/language#initialize) is called (usually right before
 > training). It lets you define data resources for initializing the pipeline in
 > your `config.cfg`. After training, the rules are saved to disk with the
 > exported pipeline, so your runtime model doesn't depend on local data. For
 > details see the [config lifecycle](/usage/training/#config-lifecycle) and
 > [initialization](/usage/training/#initialization) docs.
 ```ini
 ### config.cfg (excerpt)
 [initialize.components.attribute_ruler]
 [initialize.components.attribute_ruler.tag_map]
@readers = "srsly.read_json.v1"
 path = "./corpus/tag_map.json"
 ```
 The `AttributeRuler` also provides two handy helper methods
 [`load_from_tag_map`](/api/attributeruler#load_from_tag_map) and
 [`load_from_morph_rules`](/api/attributeruler#load_from_morph_rules) that let
 you load in your existing tag map or morph rules: