registry.assets -> registry.misc

This commit is contained in:
Ines Montani 2020-09-03 17:31:14 +02:00
parent c063e55eb7
commit 5afe6447cd
13 changed files with 60 additions and 54 deletions

View File

@ -24,7 +24,7 @@ def build_nel_encoder(tok2vec: Model, nO: Optional[int] = None) -> Model:
return model return model
@registry.assets.register("spacy.KBFromFile.v1") @registry.misc.register("spacy.KBFromFile.v1")
def load_kb(kb_path: str) -> Callable[[Vocab], KnowledgeBase]: def load_kb(kb_path: str) -> Callable[[Vocab], KnowledgeBase]:
def kb_from_file(vocab): def kb_from_file(vocab):
kb = KnowledgeBase(vocab, entity_vector_length=1) kb = KnowledgeBase(vocab, entity_vector_length=1)
@ -34,7 +34,7 @@ def load_kb(kb_path: str) -> Callable[[Vocab], KnowledgeBase]:
return kb_from_file return kb_from_file
@registry.assets.register("spacy.EmptyKB.v1") @registry.misc.register("spacy.EmptyKB.v1")
def empty_kb(entity_vector_length: int) -> Callable[[Vocab], KnowledgeBase]: def empty_kb(entity_vector_length: int) -> Callable[[Vocab], KnowledgeBase]:
def empty_kb_factory(vocab): def empty_kb_factory(vocab):
return KnowledgeBase(vocab=vocab, entity_vector_length=entity_vector_length) return KnowledgeBase(vocab=vocab, entity_vector_length=entity_vector_length)
@ -42,6 +42,6 @@ def empty_kb(entity_vector_length: int) -> Callable[[Vocab], KnowledgeBase]:
return empty_kb_factory return empty_kb_factory
@registry.assets.register("spacy.CandidateGenerator.v1") @registry.misc.register("spacy.CandidateGenerator.v1")
def create_candidates() -> Callable[[KnowledgeBase, "Span"], Iterable[Candidate]]: def create_candidates() -> Callable[[KnowledgeBase, "Span"], Iterable[Candidate]]:
return get_candidates return get_candidates

View File

@ -39,12 +39,12 @@ DEFAULT_NEL_MODEL = Config().from_str(default_model_config)["model"]
requires=["doc.ents", "doc.sents", "token.ent_iob", "token.ent_type"], requires=["doc.ents", "doc.sents", "token.ent_iob", "token.ent_type"],
assigns=["token.ent_kb_id"], assigns=["token.ent_kb_id"],
default_config={ default_config={
"kb_loader": {"@assets": "spacy.EmptyKB.v1", "entity_vector_length": 64}, "kb_loader": {"@misc": "spacy.EmptyKB.v1", "entity_vector_length": 64},
"model": DEFAULT_NEL_MODEL, "model": DEFAULT_NEL_MODEL,
"labels_discard": [], "labels_discard": [],
"incl_prior": True, "incl_prior": True,
"incl_context": True, "incl_context": True,
"get_candidates": {"@assets": "spacy.CandidateGenerator.v1"}, "get_candidates": {"@misc": "spacy.CandidateGenerator.v1"},
}, },
) )
def make_entity_linker( def make_entity_linker(

View File

@ -14,7 +14,7 @@ LANGUAGES = ["el", "en", "fr", "nl"]
@pytest.mark.parametrize("lang", LANGUAGES) @pytest.mark.parametrize("lang", LANGUAGES)
def test_lemmatizer_initialize(lang, capfd): def test_lemmatizer_initialize(lang, capfd):
@registry.assets("lemmatizer_init_lookups") @registry.misc("lemmatizer_init_lookups")
def lemmatizer_init_lookups(): def lemmatizer_init_lookups():
lookups = Lookups() lookups = Lookups()
lookups.add_table("lemma_lookup", {"cope": "cope"}) lookups.add_table("lemma_lookup", {"cope": "cope"})
@ -25,9 +25,7 @@ def test_lemmatizer_initialize(lang, capfd):
"""Test that languages can be initialized.""" """Test that languages can be initialized."""
nlp = get_lang_class(lang)() nlp = get_lang_class(lang)()
nlp.add_pipe( nlp.add_pipe("lemmatizer", config={"lookups": {"@misc": "lemmatizer_init_lookups"}})
"lemmatizer", config={"lookups": {"@assets": "lemmatizer_init_lookups"}}
)
# Check for stray print statements (see #3342) # Check for stray print statements (see #3342)
doc = nlp("test") # noqa: F841 doc = nlp("test") # noqa: F841
captured = capfd.readouterr() captured = capfd.readouterr()

View File

@ -31,7 +31,7 @@ def pattern_dicts():
] ]
@registry.assets("attribute_ruler_patterns") @registry.misc("attribute_ruler_patterns")
def attribute_ruler_patterns(): def attribute_ruler_patterns():
return [ return [
{ {
@ -86,7 +86,7 @@ def test_attributeruler_init_patterns(nlp, pattern_dicts):
# initialize with patterns from asset # initialize with patterns from asset
nlp.add_pipe( nlp.add_pipe(
"attribute_ruler", "attribute_ruler",
config={"pattern_dicts": {"@assets": "attribute_ruler_patterns"}}, config={"pattern_dicts": {"@misc": "attribute_ruler_patterns"}},
) )
doc = nlp("This is a test.") doc = nlp("This is a test.")
assert doc[2].lemma_ == "the" assert doc[2].lemma_ == "the"

View File

@ -137,7 +137,7 @@ def test_kb_undefined(nlp):
def test_kb_empty(nlp): def test_kb_empty(nlp):
"""Test that the EL can't train with an empty KB""" """Test that the EL can't train with an empty KB"""
config = {"kb_loader": {"@assets": "spacy.EmptyKB.v1", "entity_vector_length": 342}} config = {"kb_loader": {"@misc": "spacy.EmptyKB.v1", "entity_vector_length": 342}}
entity_linker = nlp.add_pipe("entity_linker", config=config) entity_linker = nlp.add_pipe("entity_linker", config=config)
assert len(entity_linker.kb) == 0 assert len(entity_linker.kb) == 0
with pytest.raises(ValueError): with pytest.raises(ValueError):
@ -183,7 +183,7 @@ def test_el_pipe_configuration(nlp):
ruler = nlp.add_pipe("entity_ruler") ruler = nlp.add_pipe("entity_ruler")
ruler.add_patterns([pattern]) ruler.add_patterns([pattern])
@registry.assets.register("myAdamKB.v1") @registry.misc.register("myAdamKB.v1")
def mykb() -> Callable[["Vocab"], KnowledgeBase]: def mykb() -> Callable[["Vocab"], KnowledgeBase]:
def create_kb(vocab): def create_kb(vocab):
kb = KnowledgeBase(vocab, entity_vector_length=1) kb = KnowledgeBase(vocab, entity_vector_length=1)
@ -199,7 +199,7 @@ def test_el_pipe_configuration(nlp):
# run an EL pipe without a trained context encoder, to check the candidate generation step only # run an EL pipe without a trained context encoder, to check the candidate generation step only
nlp.add_pipe( nlp.add_pipe(
"entity_linker", "entity_linker",
config={"kb_loader": {"@assets": "myAdamKB.v1"}, "incl_context": False}, config={"kb_loader": {"@misc": "myAdamKB.v1"}, "incl_context": False},
) )
# With the default get_candidates function, matching is case-sensitive # With the default get_candidates function, matching is case-sensitive
text = "Douglas and douglas are not the same." text = "Douglas and douglas are not the same."
@ -211,7 +211,7 @@ def test_el_pipe_configuration(nlp):
def get_lowercased_candidates(kb, span): def get_lowercased_candidates(kb, span):
return kb.get_alias_candidates(span.text.lower()) return kb.get_alias_candidates(span.text.lower())
@registry.assets.register("spacy.LowercaseCandidateGenerator.v1") @registry.misc.register("spacy.LowercaseCandidateGenerator.v1")
def create_candidates() -> Callable[[KnowledgeBase, "Span"], Iterable[Candidate]]: def create_candidates() -> Callable[[KnowledgeBase, "Span"], Iterable[Candidate]]:
return get_lowercased_candidates return get_lowercased_candidates
@ -220,9 +220,9 @@ def test_el_pipe_configuration(nlp):
"entity_linker", "entity_linker",
"entity_linker", "entity_linker",
config={ config={
"kb_loader": {"@assets": "myAdamKB.v1"}, "kb_loader": {"@misc": "myAdamKB.v1"},
"incl_context": False, "incl_context": False,
"get_candidates": {"@assets": "spacy.LowercaseCandidateGenerator.v1"}, "get_candidates": {"@misc": "spacy.LowercaseCandidateGenerator.v1"},
}, },
) )
doc = nlp(text) doc = nlp(text)
@ -282,7 +282,7 @@ def test_append_invalid_alias(nlp):
def test_preserving_links_asdoc(nlp): def test_preserving_links_asdoc(nlp):
"""Test that Span.as_doc preserves the existing entity links""" """Test that Span.as_doc preserves the existing entity links"""
@registry.assets.register("myLocationsKB.v1") @registry.misc.register("myLocationsKB.v1")
def dummy_kb() -> Callable[["Vocab"], KnowledgeBase]: def dummy_kb() -> Callable[["Vocab"], KnowledgeBase]:
def create_kb(vocab): def create_kb(vocab):
mykb = KnowledgeBase(vocab, entity_vector_length=1) mykb = KnowledgeBase(vocab, entity_vector_length=1)
@ -304,7 +304,7 @@ def test_preserving_links_asdoc(nlp):
] ]
ruler = nlp.add_pipe("entity_ruler") ruler = nlp.add_pipe("entity_ruler")
ruler.add_patterns(patterns) ruler.add_patterns(patterns)
el_config = {"kb_loader": {"@assets": "myLocationsKB.v1"}, "incl_prior": False} el_config = {"kb_loader": {"@misc": "myLocationsKB.v1"}, "incl_prior": False}
el_pipe = nlp.add_pipe("entity_linker", config=el_config, last=True) el_pipe = nlp.add_pipe("entity_linker", config=el_config, last=True)
el_pipe.begin_training(lambda: []) el_pipe.begin_training(lambda: [])
el_pipe.incl_context = False el_pipe.incl_context = False
@ -387,7 +387,7 @@ def test_overfitting_IO():
doc = nlp(text) doc = nlp(text)
train_examples.append(Example.from_dict(doc, annotation)) train_examples.append(Example.from_dict(doc, annotation))
@registry.assets.register("myOverfittingKB.v1") @registry.misc.register("myOverfittingKB.v1")
def dummy_kb() -> Callable[["Vocab"], KnowledgeBase]: def dummy_kb() -> Callable[["Vocab"], KnowledgeBase]:
def create_kb(vocab): def create_kb(vocab):
# create artificial KB - assign same prior weight to the two russ cochran's # create artificial KB - assign same prior weight to the two russ cochran's
@ -408,7 +408,7 @@ def test_overfitting_IO():
# Create the Entity Linker component and add it to the pipeline # Create the Entity Linker component and add it to the pipeline
nlp.add_pipe( nlp.add_pipe(
"entity_linker", "entity_linker",
config={"kb_loader": {"@assets": "myOverfittingKB.v1"}}, config={"kb_loader": {"@misc": "myOverfittingKB.v1"}},
last=True, last=True,
) )

View File

@ -13,7 +13,7 @@ def nlp():
@pytest.fixture @pytest.fixture
def lemmatizer(nlp): def lemmatizer(nlp):
@registry.assets("cope_lookups") @registry.misc("cope_lookups")
def cope_lookups(): def cope_lookups():
lookups = Lookups() lookups = Lookups()
lookups.add_table("lemma_lookup", {"cope": "cope"}) lookups.add_table("lemma_lookup", {"cope": "cope"})
@ -23,13 +23,13 @@ def lemmatizer(nlp):
return lookups return lookups
lemmatizer = nlp.add_pipe( lemmatizer = nlp.add_pipe(
"lemmatizer", config={"mode": "rule", "lookups": {"@assets": "cope_lookups"}} "lemmatizer", config={"mode": "rule", "lookups": {"@misc": "cope_lookups"}}
) )
return lemmatizer return lemmatizer
def test_lemmatizer_init(nlp): def test_lemmatizer_init(nlp):
@registry.assets("cope_lookups") @registry.misc("cope_lookups")
def cope_lookups(): def cope_lookups():
lookups = Lookups() lookups = Lookups()
lookups.add_table("lemma_lookup", {"cope": "cope"}) lookups.add_table("lemma_lookup", {"cope": "cope"})
@ -39,7 +39,7 @@ def test_lemmatizer_init(nlp):
return lookups return lookups
lemmatizer = nlp.add_pipe( lemmatizer = nlp.add_pipe(
"lemmatizer", config={"mode": "lookup", "lookups": {"@assets": "cope_lookups"}} "lemmatizer", config={"mode": "lookup", "lookups": {"@misc": "cope_lookups"}}
) )
assert isinstance(lemmatizer.lookups, Lookups) assert isinstance(lemmatizer.lookups, Lookups)
assert lemmatizer.mode == "lookup" assert lemmatizer.mode == "lookup"
@ -51,14 +51,14 @@ def test_lemmatizer_init(nlp):
nlp.remove_pipe("lemmatizer") nlp.remove_pipe("lemmatizer")
@registry.assets("empty_lookups") @registry.misc("empty_lookups")
def empty_lookups(): def empty_lookups():
return Lookups() return Lookups()
with pytest.raises(ValueError): with pytest.raises(ValueError):
nlp.add_pipe( nlp.add_pipe(
"lemmatizer", "lemmatizer",
config={"mode": "lookup", "lookups": {"@assets": "empty_lookups"}}, config={"mode": "lookup", "lookups": {"@misc": "empty_lookups"}},
) )
@ -79,7 +79,7 @@ def test_lemmatizer_config(nlp, lemmatizer):
def test_lemmatizer_serialize(nlp, lemmatizer): def test_lemmatizer_serialize(nlp, lemmatizer):
@registry.assets("cope_lookups") @registry.misc("cope_lookups")
def cope_lookups(): def cope_lookups():
lookups = Lookups() lookups = Lookups()
lookups.add_table("lemma_lookup", {"cope": "cope"}) lookups.add_table("lemma_lookup", {"cope": "cope"})
@ -90,7 +90,7 @@ def test_lemmatizer_serialize(nlp, lemmatizer):
nlp2 = English() nlp2 = English()
lemmatizer2 = nlp2.add_pipe( lemmatizer2 = nlp2.add_pipe(
"lemmatizer", config={"mode": "rule", "lookups": {"@assets": "cope_lookups"}} "lemmatizer", config={"mode": "rule", "lookups": {"@misc": "cope_lookups"}}
) )
lemmatizer2.from_bytes(lemmatizer.to_bytes()) lemmatizer2.from_bytes(lemmatizer.to_bytes())
assert lemmatizer.to_bytes() == lemmatizer2.to_bytes() assert lemmatizer.to_bytes() == lemmatizer2.to_bytes()

View File

@ -71,7 +71,7 @@ def tagger():
def entity_linker(): def entity_linker():
nlp = Language() nlp = Language()
@registry.assets.register("TestIssue5230KB.v1") @registry.misc.register("TestIssue5230KB.v1")
def dummy_kb() -> Callable[["Vocab"], KnowledgeBase]: def dummy_kb() -> Callable[["Vocab"], KnowledgeBase]:
def create_kb(vocab): def create_kb(vocab):
kb = KnowledgeBase(vocab, entity_vector_length=1) kb = KnowledgeBase(vocab, entity_vector_length=1)
@ -80,7 +80,7 @@ def entity_linker():
return create_kb return create_kb
config = {"kb_loader": {"@assets": "TestIssue5230KB.v1"}} config = {"kb_loader": {"@misc": "TestIssue5230KB.v1"}}
entity_linker = nlp.add_pipe("entity_linker", config=config) entity_linker = nlp.add_pipe("entity_linker", config=config)
# need to add model for two reasons: # need to add model for two reasons:
# 1. no model leads to error in serialization, # 1. no model leads to error in serialization,

View File

@ -85,7 +85,7 @@ def test_serialize_subclassed_kb():
super().__init__(vocab, entity_vector_length) super().__init__(vocab, entity_vector_length)
self.custom_field = custom_field self.custom_field = custom_field
@registry.assets.register("spacy.CustomKB.v1") @registry.misc.register("spacy.CustomKB.v1")
def custom_kb( def custom_kb(
entity_vector_length: int, custom_field: int entity_vector_length: int, custom_field: int
) -> Callable[["Vocab"], KnowledgeBase]: ) -> Callable[["Vocab"], KnowledgeBase]:
@ -101,7 +101,7 @@ def test_serialize_subclassed_kb():
nlp = English() nlp = English()
config = { config = {
"kb_loader": { "kb_loader": {
"@assets": "spacy.CustomKB.v1", "@misc": "spacy.CustomKB.v1",
"entity_vector_length": 342, "entity_vector_length": 342,
"custom_field": 666, "custom_field": 666,
} }

View File

@ -76,7 +76,7 @@ class registry(thinc.registry):
lemmatizers = catalogue.create("spacy", "lemmatizers", entry_points=True) lemmatizers = catalogue.create("spacy", "lemmatizers", entry_points=True)
lookups = catalogue.create("spacy", "lookups", entry_points=True) lookups = catalogue.create("spacy", "lookups", entry_points=True)
displacy_colors = catalogue.create("spacy", "displacy_colors", entry_points=True) displacy_colors = catalogue.create("spacy", "displacy_colors", entry_points=True)
assets = catalogue.create("spacy", "assets", entry_points=True) misc = catalogue.create("spacy", "misc", entry_points=True)
# Callback functions used to manipulate nlp object etc. # Callback functions used to manipulate nlp object etc.
callbacks = catalogue.create("spacy", "callbacks") callbacks = catalogue.create("spacy", "callbacks")
batchers = catalogue.create("spacy", "batchers", entry_points=True) batchers = catalogue.create("spacy", "batchers", entry_points=True)

View File

@ -673,11 +673,11 @@ into the "real world". This requires 3 main components:
> subword_features = true > subword_features = true
> >
> [kb_loader] > [kb_loader]
> @assets = "spacy.EmptyKB.v1" > @misc = "spacy.EmptyKB.v1"
> entity_vector_length = 64 > entity_vector_length = 64
> >
> [get_candidates] > [get_candidates]
> @assets = "spacy.CandidateGenerator.v1" > @misc = "spacy.CandidateGenerator.v1"
> ``` > ```
The `EntityLinker` model architecture is a Thinc `Model` with a The `EntityLinker` model architecture is a Thinc `Model` with a

View File

@ -34,8 +34,8 @@ architectures and their arguments and hyperparameters.
> "incl_prior": True, > "incl_prior": True,
> "incl_context": True, > "incl_context": True,
> "model": DEFAULT_NEL_MODEL, > "model": DEFAULT_NEL_MODEL,
> "kb_loader": {'@assets': 'spacy.EmptyKB.v1', 'entity_vector_length': 64}, > "kb_loader": {'@misc': 'spacy.EmptyKB.v1', 'entity_vector_length': 64},
> "get_candidates": {'@assets': 'spacy.CandidateGenerator.v1'}, > "get_candidates": {'@misc': 'spacy.CandidateGenerator.v1'},
> } > }
> nlp.add_pipe("entity_linker", config=config) > nlp.add_pipe("entity_linker", config=config)
> ``` > ```
@ -66,7 +66,7 @@ https://github.com/explosion/spaCy/blob/develop/spacy/pipeline/entity_linker.py
> entity_linker = nlp.add_pipe("entity_linker", config=config) > entity_linker = nlp.add_pipe("entity_linker", config=config)
> >
> # Construction via add_pipe with custom KB and candidate generation > # Construction via add_pipe with custom KB and candidate generation
> config = {"kb": {"@assets": "my_kb.v1"}} > config = {"kb": {"@misc": "my_kb.v1"}}
> entity_linker = nlp.add_pipe("entity_linker", config=config) > entity_linker = nlp.add_pipe("entity_linker", config=config)
> >
> # Construction from class > # Construction from class

View File

@ -307,7 +307,6 @@ factories.
| Registry name | Description | | Registry name | Description |
| ----------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | ----------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `architectures` | Registry for functions that create [model architectures](/api/architectures). Can be used to register custom model architectures and reference them in the `config.cfg`. | | `architectures` | Registry for functions that create [model architectures](/api/architectures). Can be used to register custom model architectures and reference them in the `config.cfg`. |
| `assets` | Registry for data assets, knowledge bases etc. |
| `batchers` | Registry for training and evaluation [data batchers](#batchers). | | `batchers` | Registry for training and evaluation [data batchers](#batchers). |
| `callbacks` | Registry for custom callbacks to [modify the `nlp` object](/usage/training#custom-code-nlp-callbacks) before training. | | `callbacks` | Registry for custom callbacks to [modify the `nlp` object](/usage/training#custom-code-nlp-callbacks) before training. |
| `displacy_colors` | Registry for custom color scheme for the [`displacy` NER visualizer](/usage/visualizers). Automatically reads from [entry points](/usage/saving-loading#entry-points). | | `displacy_colors` | Registry for custom color scheme for the [`displacy` NER visualizer](/usage/visualizers). Automatically reads from [entry points](/usage/saving-loading#entry-points). |
@ -322,6 +321,7 @@ factories.
| `readers` | Registry for training and evaluation data readers like [`Corpus`](/api/corpus). | | `readers` | Registry for training and evaluation data readers like [`Corpus`](/api/corpus). |
| `schedules` | Registry for functions that create [schedules](https://thinc.ai/docs/api-schedules). | | `schedules` | Registry for functions that create [schedules](https://thinc.ai/docs/api-schedules). |
| `tokenizers` | Registry for tokenizer factories. Registered functions should return a callback that receives the `nlp` object and returns a [`Tokenizer`](/api/tokenizer) or a custom callable. | | `tokenizers` | Registry for tokenizer factories. Registered functions should return a callback that receives the `nlp` object and returns a [`Tokenizer`](/api/tokenizer) or a custom callable. |
| `misc` | Registry for miscellaneous functions that return data assets, knowledge bases or anything else you may need. |
### spacy-transformers registry {#registry-transformers} ### spacy-transformers registry {#registry-transformers}

View File

@ -842,12 +842,20 @@ load and train custom pipelines with custom components. A simple solution is to
**register a function** that returns your resources. The **register a function** that returns your resources. The
[registry](/api/top-level#registry) lets you **map string names to functions** [registry](/api/top-level#registry) lets you **map string names to functions**
that create objects, so given a name and optional arguments, spaCy will know how that create objects, so given a name and optional arguments, spaCy will know how
to recreate the object. To register a function that returns a custom asset, you to recreate the object. To register a function that returns your custom
can use the `@spacy.registry.assets` decorator with a single argument, the name: dictionary, you can use the `@spacy.registry.misc` decorator with a single
argument, the name:
> #### What's the misc registry?
>
> The [`registry`](/api/top-level#registry) provides different categories for
> different types of functions for example, model architectures, tokenizers or
> batchers. `misc` is intended for miscellaneous functions that don't fit
> anywhere else.
```python ```python
### Registered function for assets {highlight="1"} ### Registered function for assets {highlight="1"}
@spacy.registry.assets("acronyms.slang_dict.v1") @spacy.registry.misc("acronyms.slang_dict.v1")
def create_acronyms_slang_dict(): def create_acronyms_slang_dict():
dictionary = {"lol": "laughing out loud", "brb": "be right back"} dictionary = {"lol": "laughing out loud", "brb": "be right back"}
dictionary.update({value: key for key, value in dictionary.items()}) dictionary.update({value: key for key, value in dictionary.items()})
@ -856,9 +864,9 @@ def create_acronyms_slang_dict():
In your `default_config` (and later in your In your `default_config` (and later in your
[training config](/usage/training#config)), you can now refer to the function [training config](/usage/training#config)), you can now refer to the function
registered under the name `"acronyms.slang_dict.v1"` using the `@assets` key. registered under the name `"acronyms.slang_dict.v1"` using the `@misc` key. This
This tells spaCy how to create the value, and when your component is created, tells spaCy how to create the value, and when your component is created, the
the result of the registered function is passed in as the key `"dictionary"`. result of the registered function is passed in as the key `"dictionary"`.
> #### config.cfg > #### config.cfg
> >
@ -867,22 +875,22 @@ the result of the registered function is passed in as the key `"dictionary"`.
> factory = "acronyms" > factory = "acronyms"
> >
> [components.acronyms.dictionary] > [components.acronyms.dictionary]
> @assets = "acronyms.slang_dict.v1" > @misc = "acronyms.slang_dict.v1"
> ``` > ```
```diff ```diff
- default_config = {"dictionary:" DICTIONARY} - default_config = {"dictionary:" DICTIONARY}
+ default_config = {"dictionary": {"@assets": "acronyms.slang_dict.v1"}} + default_config = {"dictionary": {"@misc": "acronyms.slang_dict.v1"}}
``` ```
Using a registered function also means that you can easily include your custom Using a registered function also means that you can easily include your custom
components in pipelines that you [train](/usage/training). To make sure spaCy components in pipelines that you [train](/usage/training). To make sure spaCy
knows where to find your custom `@assets` function, you can pass in a Python knows where to find your custom `@misc` function, you can pass in a Python file
file via the argument `--code`. If someone else is using your component, all via the argument `--code`. If someone else is using your component, all they
they have to do to customize the data is to register their own function and swap have to do to customize the data is to register their own function and swap out
out the name. Registered functions can also take **arguments** by the way that the name. Registered functions can also take **arguments** by the way that can
can be defined in the config as well you can read more about this in the docs be defined in the config as well you can read more about this in the docs on
on [training with custom code](/usage/training#custom-code). [training with custom code](/usage/training#custom-code).
### Python type hints and pydantic validation {#type-hints new="3"} ### Python type hints and pydantic validation {#type-hints new="3"}