Tidy up, tests and docs

This commit is contained in:
Ines Montani 2020-10-04 13:54:05 +02:00
parent 96b636c2d3
commit 11347f34da
5 changed files with 193 additions and 152 deletions

View File

@ -1,10 +1,11 @@
from typing import List, Dict, Union, Iterable, Any, Optional, Callable, Iterator
from typing import Tuple
import srsly
from typing import List, Dict, Union, Iterable, Any, Optional
from pathlib import Path
from .pipe import Pipe
from ..errors import Errors
from ..training import validate_examples
from ..training import validate_examples, Example
from ..language import Language
from ..matcher import Matcher
from ..scorer import Scorer
@ -22,17 +23,9 @@ TagMapType = Dict[str, Dict[Union[int, str], Union[int, str]]]
MorphRulesType = Dict[str, Dict[str, Dict[Union[int, str], Union[int, str]]]]
@Language.factory(
"attribute_ruler", default_config={"validate": False}
)
def make_attribute_ruler(
nlp: Language,
name: str,
validate: bool,
):
return AttributeRuler(
nlp.vocab, name, pattern_dicts=pattern_dicts, validate=validate
)
@Language.factory("attribute_ruler", default_config={"validate": False})
def make_attribute_ruler(nlp: Language, name: str, validate: bool):
return AttributeRuler(nlp.vocab, name, validate=validate)
class AttributeRuler(Pipe):
@ -43,12 +36,7 @@ class AttributeRuler(Pipe):
"""
def __init__(
self,
vocab: Vocab,
name: str = "attribute_ruler",
*,
pattern_dicts: Optional[Iterable[AttributeRulerPatternType]] = None,
validate: bool = False,
self, vocab: Vocab, name: str = "attribute_ruler", *, validate: bool = False
) -> None:
"""Create the AttributeRuler. After creation, you can add patterns
with the `.initialize()` or `.add_patterns()` methods, or load patterns
@ -71,12 +59,12 @@ class AttributeRuler(Pipe):
def initialize(
self,
get_examples: Optional[Callable[[], Iterable[Example]]] = None,
get_examples: Optional[Callable[[], Iterable[Example]]],
*,
nlp: Optional[Language] = None,
patterns: Optional[Iterable[AttributeRulerPatternType]] = None,
tag_map: Optional[TagMapType]=None,
morph_rules: Optional[MorphRulesType]=None
tag_map: Optional[TagMapType] = None,
morph_rules: Optional[MorphRulesType] = None,
):
"""Initialize the attribute ruler by adding zero or more patterns.
@ -126,7 +114,7 @@ class AttributeRuler(Pipe):
set_token_attrs(span[index], attrs)
return doc
def pipe(self, stream, *, batch_size=128):
def pipe(self, stream: Iterable[Doc], *, batch_size: int = 128) -> Iterator[Doc]:
"""Apply the pipe to a stream of documents. This usually happens under
the hood when the nlp object is called on a text and all components are
applied to the Doc.
@ -210,16 +198,16 @@ class AttributeRuler(Pipe):
self.attrs.append(attrs)
self.indices.append(index)
def add_patterns(self, pattern_dicts: Iterable[AttributeRulerPatternType]) -> None:
def add_patterns(self, patterns: Iterable[AttributeRulerPatternType]) -> None:
"""Add patterns from a list of pattern dicts with the keys as the
arguments to AttributeRuler.add.
pattern_dicts (Iterable[dict]): A list of pattern dicts with the keys
patterns (Iterable[dict]): A list of pattern dicts with the keys
as the arguments to AttributeRuler.add (patterns/attrs/index) to
add as patterns.
DOCS: https://nightly.spacy.io/api/attributeruler#add_patterns
"""
for p in pattern_dicts:
for p in patterns:
self.add(**p)
@property
@ -234,7 +222,7 @@ class AttributeRuler(Pipe):
all_patterns.append(p)
return all_patterns
def score(self, examples, **kwargs):
def score(self, examples: Iterable[Example], **kwargs) -> Dict[str, Any]:
"""Score a batch of examples.
examples (Iterable[Example]): The examples to score.
@ -275,7 +263,7 @@ class AttributeRuler(Pipe):
def from_bytes(
self, bytes_data: bytes, exclude: Iterable[str] = SimpleFrozenList()
):
) -> "AttributeRuler":
"""Load the AttributeRuler from a bytestring.
bytes_data (bytes): The data to load.
@ -293,7 +281,6 @@ class AttributeRuler(Pipe):
"patterns": load_patterns,
}
util.from_bytes(bytes_data, deserialize, exclude)
return self
def to_disk(
@ -303,6 +290,7 @@ class AttributeRuler(Pipe):
path (Union[Path, str]): A path to a directory.
exclude (Iterable[str]): String names of serialization fields to exclude.
DOCS: https://nightly.spacy.io/api/attributeruler#to_disk
"""
serialize = {
@ -313,11 +301,13 @@ class AttributeRuler(Pipe):
def from_disk(
self, path: Union[Path, str], exclude: Iterable[str] = SimpleFrozenList()
) -> None:
) -> "AttributeRuler":
"""Load the AttributeRuler from disk.
path (Union[Path, str]): A path to a directory.
exclude (Iterable[str]): String names of serialization fields to exclude.
RETURNS (AttributeRuler): The loaded object.
DOCS: https://nightly.spacy.io/api/attributeruler#from_disk
"""
@ -329,11 +319,10 @@ class AttributeRuler(Pipe):
"patterns": load_patterns,
}
util.from_disk(path, deserialize, exclude)
return self
def _split_morph_attrs(attrs):
def _split_morph_attrs(attrs: dict) -> Tuple[dict, dict]:
"""Split entries from a tag map or morph rules dict into to two dicts, one
with the token-level features (POS, LEMMA) and one with the remaining
features, which are presumed to be individual MORPH features."""

View File

@ -63,6 +63,39 @@ def morph_rules():
return {"DT": {"the": {"POS": "DET", "LEMMA": "a", "Case": "Nom"}}}
def check_tag_map(ruler):
doc = Doc(
ruler.vocab,
words=["This", "is", "a", "test", "."],
tags=["DT", "VBZ", "DT", "NN", "."],
)
doc = ruler(doc)
for i in range(len(doc)):
if i == 4:
assert doc[i].pos_ == "PUNCT"
assert str(doc[i].morph) == "PunctType=peri"
else:
assert doc[i].pos_ == ""
assert str(doc[i].morph) == ""
def check_morph_rules(ruler):
doc = Doc(
ruler.vocab,
words=["This", "is", "the", "test", "."],
tags=["DT", "VBZ", "DT", "NN", "."],
)
doc = ruler(doc)
for i in range(len(doc)):
if i != 2:
assert doc[i].pos_ == ""
assert str(doc[i].morph) == ""
else:
assert doc[2].pos_ == "DET"
assert doc[2].lemma_ == "a"
assert str(doc[2].morph) == "Case=Nom"
def test_attributeruler_init(nlp, pattern_dicts):
a = nlp.add_pipe("attribute_ruler")
for p in pattern_dicts:
@ -78,7 +111,8 @@ def test_attributeruler_init(nlp, pattern_dicts):
def test_attributeruler_init_patterns(nlp, pattern_dicts):
# initialize with patterns
nlp.add_pipe("attribute_ruler", config={"pattern_dicts": pattern_dicts})
ruler = nlp.add_pipe("attribute_ruler")
ruler.initialize(lambda: [], patterns=pattern_dicts)
doc = nlp("This is a test.")
assert doc[2].lemma_ == "the"
assert str(doc[2].morph) == "Case=Nom|Number=Plur"
@ -88,10 +122,11 @@ def test_attributeruler_init_patterns(nlp, pattern_dicts):
assert doc.has_annotation("MORPH")
nlp.remove_pipe("attribute_ruler")
# initialize with patterns from asset
nlp.add_pipe(
"attribute_ruler",
config={"pattern_dicts": {"@misc": "attribute_ruler_patterns"}},
)
nlp.config["initialize"]["components"]["attribute_ruler"] = {
"patterns": {"@misc": "attribute_ruler_patterns"}
}
nlp.add_pipe("attribute_ruler")
nlp.initialize()
doc = nlp("This is a test.")
assert doc[2].lemma_ == "the"
assert str(doc[2].morph) == "Case=Nom|Number=Plur"
@ -103,18 +138,15 @@ def test_attributeruler_init_patterns(nlp, pattern_dicts):
def test_attributeruler_score(nlp, pattern_dicts):
# initialize with patterns
nlp.add_pipe("attribute_ruler", config={"pattern_dicts": pattern_dicts})
ruler = nlp.add_pipe("attribute_ruler")
ruler.initialize(lambda: [], patterns=pattern_dicts)
doc = nlp("This is a test.")
assert doc[2].lemma_ == "the"
assert str(doc[2].morph) == "Case=Nom|Number=Plur"
assert doc[3].lemma_ == "cat"
assert str(doc[3].morph) == "Case=Nom|Number=Sing"
dev_examples = [
Example.from_dict(
nlp.make_doc("This is a test."), {"lemmas": ["this", "is", "a", "cat", "."]}
)
]
doc = nlp.make_doc("This is a test.")
dev_examples = [Example.from_dict(doc, {"lemmas": ["this", "is", "a", "cat", "."]})]
scores = nlp.evaluate(dev_examples)
# "cat" is the only correct lemma
assert scores["lemma_acc"] == pytest.approx(0.2)
@ -139,40 +171,27 @@ def test_attributeruler_rule_order(nlp):
def test_attributeruler_tag_map(nlp, tag_map):
a = AttributeRuler(nlp.vocab)
a.load_from_tag_map(tag_map)
doc = Doc(
nlp.vocab,
words=["This", "is", "a", "test", "."],
tags=["DT", "VBZ", "DT", "NN", "."],
)
doc = a(doc)
for i in range(len(doc)):
if i == 4:
assert doc[i].pos_ == "PUNCT"
assert str(doc[i].morph) == "PunctType=peri"
else:
assert doc[i].pos_ == ""
assert str(doc[i].morph) == ""
ruler = AttributeRuler(nlp.vocab)
ruler.load_from_tag_map(tag_map)
check_tag_map(ruler)
def test_attributeruler_tag_map_initialize(nlp, tag_map):
ruler = nlp.add_pipe("attribute_ruler")
ruler.initialize(lambda: [], tag_map=tag_map)
check_tag_map(ruler)
def test_attributeruler_morph_rules(nlp, morph_rules):
a = AttributeRuler(nlp.vocab)
a.load_from_morph_rules(morph_rules)
doc = Doc(
nlp.vocab,
words=["This", "is", "the", "test", "."],
tags=["DT", "VBZ", "DT", "NN", "."],
)
doc = a(doc)
for i in range(len(doc)):
if i != 2:
assert doc[i].pos_ == ""
assert str(doc[i].morph) == ""
else:
assert doc[2].pos_ == "DET"
assert doc[2].lemma_ == "a"
assert str(doc[2].morph) == "Case=Nom"
ruler = AttributeRuler(nlp.vocab)
ruler.load_from_morph_rules(morph_rules)
check_morph_rules(ruler)
def test_attributeruler_morph_rules_initialize(nlp, morph_rules):
ruler = nlp.add_pipe("attribute_ruler")
ruler.initialize(lambda: [], morph_rules=morph_rules)
check_morph_rules(ruler)
def test_attributeruler_indices(nlp):

View File

@ -4,6 +4,7 @@ tag: class
source: spacy/pipeline/attributeruler.py
new: 3
teaser: 'Pipeline component for rule-based token attribute assignment'
api_base_class: /api/pipe
api_string_name: attribute_ruler
api_trainable: false
---
@ -25,17 +26,13 @@ how the component should be configured. You can override its settings via the
> #### Example
>
> ```python
> config = {
> "pattern_dicts": None,
> "validate": True,
> }
> config = {"validate": True}
> nlp.add_pipe("attribute_ruler", config=config)
> ```
| Setting | Description |
| --------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `pattern_dicts` | A list of pattern dicts with the keys as the arguments to [`AttributeRuler.add`](/api/attributeruler#add) (`patterns`/`attrs`/`index`) to add as patterns. Defaults to `None`. ~~Optional[Iterable[Dict[str, Union[List[dict], dict, int]]]]~~ |
| `validate` | Whether patterns should be validated (passed to the `Matcher`). Defaults to `False`. ~~bool~~ |
| Setting | Description |
| ---------- | --------------------------------------------------------------------------------------------- |
| `validate` | Whether patterns should be validated (passed to the `Matcher`). Defaults to `False`. ~~bool~~ |
```python
%%GITHUB_SPACY/spacy/pipeline/attributeruler.py
@ -43,36 +40,26 @@ how the component should be configured. You can override its settings via the
## AttributeRuler.\_\_init\_\_ {#init tag="method"}
Initialize the attribute ruler. If pattern dicts are supplied here, they need to
be a list of dictionaries with `"patterns"`, `"attrs"`, and optional `"index"`
keys, e.g.:
```python
pattern_dicts = [
{"patterns": [[{"TAG": "VB"}]], "attrs": {"POS": "VERB"}},
{"patterns": [[{"LOWER": "an"}]], "attrs": {"LEMMA": "a"}},
]
```
Initialize the attribute ruler.
> #### Example
>
> ```python
> # Construction via add_pipe
> attribute_ruler = nlp.add_pipe("attribute_ruler")
> ruler = nlp.add_pipe("attribute_ruler")
> ```
| Name | Description |
| --------------- | ---------------------------------------------------------------------------------------------------------------------------------------- |
| `vocab` | The shared vocabulary to pass to the matcher. ~~Vocab~~ |
| `name` | Instance name of the current pipeline component. Typically passed in automatically from the factory when the component is added. ~~str~~ |
| _keyword-only_ | |
| `pattern_dicts` | Optional patterns to load in on initialization. Defaults to `None`. ~~Optional[Iterable[Dict[str, Union[List[dict], dict, int]]]]~~ |
| `validate` | Whether patterns should be validated (passed to the [`Matcher`](/api/matcher#init)). Defaults to `False`. ~~bool~~ |
| Name | Description |
| -------------- | ---------------------------------------------------------------------------------------------------------------------------------------- |
| `vocab` | The shared vocabulary to pass to the matcher. ~~Vocab~~ |
| `name` | Instance name of the current pipeline component. Typically passed in automatically from the factory when the component is added. ~~str~~ |
| _keyword-only_ | |
| `validate` | Whether patterns should be validated (passed to the [`Matcher`](/api/matcher#init)). Defaults to `False`. ~~bool~~ |
## AttributeRuler.\_\_call\_\_ {#call tag="method"}
Apply the attribute ruler to a `Doc`, setting token attributes for tokens matched
by the provided patterns.
Apply the attribute ruler to a `Doc`, setting token attributes for tokens
matched by the provided patterns.
| Name | Description |
| ----------- | -------------------------------- |
@ -90,10 +77,10 @@ may be negative to index from the end of the span.
> #### Example
>
> ```python
> attribute_ruler = nlp.add_pipe("attribute_ruler")
> ruler = nlp.add_pipe("attribute_ruler")
> patterns = [[{"TAG": "VB"}]]
> attrs = {"POS": "VERB"}
> attribute_ruler.add(patterns=patterns, attrs=attrs)
> ruler.add(patterns=patterns, attrs=attrs)
> ```
| Name | Description |
@ -107,11 +94,10 @@ may be negative to index from the end of the span.
> #### Example
>
> ```python
> attribute_ruler = nlp.add_pipe("attribute_ruler")
> pattern_dicts = [
> ruler = nlp.add_pipe("attribute_ruler")
> patterns = [
> {
> "patterns": [[{"TAG": "VB"}]],
> "attrs": {"POS": "VERB"}
> "patterns": [[{"TAG": "VB"}]], "attrs": {"POS": "VERB"}
> },
> {
> "patterns": [[{"LOWER": "two"}, {"LOWER": "apples"}]],
@ -119,15 +105,16 @@ may be negative to index from the end of the span.
> "index": -1
> },
> ]
> attribute_ruler.add_patterns(pattern_dicts)
> ruler.add_patterns(patterns)
> ```
Add patterns from a list of pattern dicts with the keys as the arguments to
Add patterns from a list of pattern dicts. Each pattern dict can specify the
keys `"patterns"`, `"attrs"` and `"index"`, which match the arguments of
[`AttributeRuler.add`](/api/attributeruler#add).
| Name | Description |
| --------------- | -------------------------------------------------------------------------- |
| `pattern_dicts` | The patterns to add. ~~Iterable[Dict[str, Union[List[dict], dict, int]]]~~ |
| Name | Description |
| ---------- | -------------------------------------------------------------------------- |
| `patterns` | The patterns to add. ~~Iterable[Dict[str, Union[List[dict], dict, int]]]~~ |
## AttributeRuler.patterns {#patterns tag="property"}
@ -139,20 +126,39 @@ Get all patterns that have been added to the attribute ruler in the
| ----------- | -------------------------------------------------------------------------------------------- |
| **RETURNS** | The patterns added to the attribute ruler. ~~List[Dict[str, Union[List[dict], dict, int]]]~~ |
## AttributeRuler.score {#score tag="method" new="3"}
## AttributeRuler.initialize {#initialize tag="method"}
Score a batch of examples.
Initialize the component with data. Typically called before training to load in
rules from a file. This method is typically called by
[`Language.initialize`](/api/language#initialize) and lets you customize
arguments it receives via the
[`[initialize.components]`](/api/data-formats#config-initialize) block in the
config.
> #### Example
>
> ```python
> scores = attribute_ruler.score(examples)
> ruler = nlp.add_pipe("attribute_ruler")
> ruler.initialize(lambda: [], nlp=nlp, patterns=patterns)
> ```
>
> ```ini
> ### config.cfg
> [initialize.components.attribute_ruler]
>
> [initialize.components.attribute_ruler.patterns]
> @readers = "srsly.read_json.v1"
> path = "corpus/attribute_ruler_patterns.json
> ```
| Name | Description |
| ----------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `examples` | The examples to score. ~~Iterable[Example]~~ |
| **RETURNS** | The scores, produced by [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attributes `"tag"`, `"pos"`, `"morph"` and `"lemma"` if present in any of the target token attributes. ~~Dict[str, float]~~ |
| Name | Description |
| -------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `get_examples` | Function that returns gold-standard annotations in the form of [`Example`](/api/example) objects (the training data). Not used by this component. ~~Callable[[], Iterable[Example]]~~ |
| _keyword-only_ | |
| `nlp` | The current `nlp` object. Defaults to `None`. ~~Optional[Language]~~ |
| `patterns` | A list of pattern dicts with the keys as the arguments to [`AttributeRuler.add`](/api/attributeruler#add) (`patterns`/`attrs`/`index`) to add as patterns. Defaults to `None`. ~~Optional[Iterable[Dict[str, Union[List[dict], dict, int]]]]~~ |
| `tag_map` | The tag map that maps fine-grained tags to coarse-grained tags and morphological features. Defaults to `None`. ~~Optional[Dict[str, Dict[Union[int, str], Union[int, str]]]]~~ |
| `morph_rules` | The morph rules that map token text and fine-grained tags to coarse-grained tags, lemmas and morphological features. Defaults to `None`. ~~Optional[Dict[str, Dict[str, Dict[Union[int, str], Union[int, str]]]]]~~ |
## AttributeRuler.load_from_tag_map {#load_from_tag_map tag="method"}
@ -170,6 +176,21 @@ Load attribute ruler patterns from morph rules.
| ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `morph_rules` | The morph rules that map token text and fine-grained tags to coarse-grained tags, lemmas and morphological features. ~~Dict[str, Dict[str, Dict[Union[int, str], Union[int, str]]]]~~ |
## AttributeRuler.score {#score tag="method" new="3"}
Score a batch of examples.
> #### Example
>
> ```python
> scores = ruler.score(examples)
> ```
| Name | Description |
| ----------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `examples` | The examples to score. ~~Iterable[Example]~~ |
| **RETURNS** | The scores, produced by [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attributes `"tag"`, `"pos"`, `"morph"` and `"lemma"` if present in any of the target token attributes. ~~Dict[str, float]~~ |
## AttributeRuler.to_disk {#to_disk tag="method"}
Serialize the pipe to disk.
@ -177,8 +198,8 @@ Serialize the pipe to disk.
> #### Example
>
> ```python
> attribute_ruler = nlp.add_pipe("attribute_ruler")
> attribute_ruler.to_disk("/path/to/attribute_ruler")
> ruler = nlp.add_pipe("attribute_ruler")
> ruler.to_disk("/path/to/attribute_ruler")
> ```
| Name | Description |
@ -194,8 +215,8 @@ Load the pipe from disk. Modifies the object in place and returns it.
> #### Example
>
> ```python
> attribute_ruler = nlp.add_pipe("attribute_ruler")
> attribute_ruler.from_disk("/path/to/attribute_ruler")
> ruler = nlp.add_pipe("attribute_ruler")
> ruler.from_disk("/path/to/attribute_ruler")
> ```
| Name | Description |
@ -210,8 +231,8 @@ Load the pipe from disk. Modifies the object in place and returns it.
> #### Example
>
> ```python
> attribute_ruler = nlp.add_pipe("attribute_ruler")
> attribute_ruler_bytes = attribute_ruler.to_bytes()
> ruler = nlp.add_pipe("attribute_ruler")
> ruler = ruler.to_bytes()
> ```
Serialize the pipe to a bytestring.
@ -229,9 +250,9 @@ Load the pipe from a bytestring. Modifies the object in place and returns it.
> #### Example
>
> ```python
> attribute_ruler_bytes = attribute_ruler.to_bytes()
> attribute_ruler = nlp.add_pipe("attribute_ruler")
> attribute_ruler.from_bytes(attribute_ruler_bytes)
> ruler_bytes = ruler.to_bytes()
> ruler = nlp.add_pipe("attribute_ruler")
> ruler.from_bytes(ruler_bytes)
> ```
| Name | Description |
@ -250,12 +271,12 @@ serialization by passing in the string names via the `exclude` argument.
> #### Example
>
> ```python
> data = attribute_ruler.to_disk("/path", exclude=["vocab"])
> data = ruler.to_disk("/path", exclude=["vocab"])
> ```
| Name | Description |
| ---------- | -------------------------------------------------------------- |
| `vocab` | The shared [`Vocab`](/api/vocab). |
| `patterns` | The `Matcher` patterns. You usually don't want to exclude this. |
| `attrs` | The attributes to set. You usually don't want to exclude this. |
| `indices` | The token indices. You usually don't want to exclude this. |
| Name | Description |
| ---------- | --------------------------------------------------------------- |
| `vocab` | The shared [`Vocab`](/api/vocab). |
| `patterns` | The `Matcher` patterns. You usually don't want to exclude this. |
| `attrs` | The attributes to set. You usually don't want to exclude this. |
| `indices` | The token indices. You usually don't want to exclude this. |

View File

@ -1801,17 +1801,7 @@ print(doc2[5].tag_, doc2[5].pos_) # WP PRON
<Infobox variant="warning" title="Migrating from spaCy v2.x">
For easy migration from from spaCy v2 to v3, the
[`AttributeRuler`](/api/attributeruler) can import a **tag map and morph rules**
in the v2 format with the methods
[`load_from_tag_map`](/api/attributeruler#load_from_tag_map) and
[`load_from_morph_rules`](/api/attributeruler#load_from_morph_rules).
```diff
nlp = spacy.blank("en")
+ ruler = nlp.add_pipe("attribute_ruler")
+ ruler.load_from_tag_map(YOUR_TAG_MAP)
```
The [`AttributeRuler`](/api/attributeruler) can import a **tag map and morph rules** in the v2.x format via its built-in methods or when the component is initialized before training. See the [migration guide](/usage/v3#migrating-training-mappings-exceptions) for details.
</Infobox>

View File

@ -804,8 +804,30 @@ nlp = spacy.blank("en")
Instead of defining a `tag_map` and `morph_rules` in the language data, spaCy
v3.0 now manages mappings and exceptions with a separate and more flexible
pipeline component, the [`AttributeRuler`](/api/attributeruler). See the
[usage guide](/usage/linguistic-features#mappings-exceptions) for examples. The
`AttributeRuler` provides two handy helper methods
[usage guide](/usage/linguistic-features#mappings-exceptions) for examples. If
you have tag maps and morph rules in the v2.x format, you can load them into the
attribute ruler before training using the `[initialize]` block of your config.
> #### What does the initialization do?
>
> The `[initialize]` block is used when
> [`nlp.initialize`](/api/language#initialize) is called (usually right before
> training). It lets you define data resources for initializing the pipeline in
> your `config.cfg`. After training, the rules are saved to disk with the
> exported pipeline, so your runtime model doesn't depend on local data. For
> details see the [config lifecycle](/usage/training/#config-lifecycle) and
> [initialization](/usage/training/#initialization) docs.
```ini
### config.cfg (excerpt)
[initialize.components.attribute_ruler]
[initialize.components.attribute_ruler.tag_map]
@readers = "srsly.read_json.v1"
path = "./corpus/tag_map.json"
```
The `AttributeRuler` also provides two handy helper methods
[`load_from_tag_map`](/api/attributeruler#load_from_tag_map) and
[`load_from_morph_rules`](/api/attributeruler#load_from_morph_rules) that let
you load in your existing tag map or morph rules: