mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-11 17:56:30 +03:00
update NEL docs after latest refactor
This commit is contained in:
parent
4fa967ea84
commit
40276fd3be
|
@ -1,3 +1,4 @@
|
||||||
|
from pathlib import Path
|
||||||
from typing import Optional, Callable, Iterable
|
from typing import Optional, Callable, Iterable
|
||||||
from thinc.api import chain, clone, list2ragged, reduce_mean, residual
|
from thinc.api import chain, clone, list2ragged, reduce_mean, residual
|
||||||
from thinc.api import Model, Maxout, Linear
|
from thinc.api import Model, Maxout, Linear
|
||||||
|
@ -25,7 +26,7 @@ def build_nel_encoder(tok2vec: Model, nO: Optional[int] = None) -> Model:
|
||||||
|
|
||||||
|
|
||||||
@registry.misc.register("spacy.KBFromFile.v1")
|
@registry.misc.register("spacy.KBFromFile.v1")
|
||||||
def load_kb(kb_path: str) -> Callable[[Vocab], KnowledgeBase]:
|
def load_kb(kb_path: Path) -> Callable[[Vocab], KnowledgeBase]:
|
||||||
def kb_from_file(vocab):
|
def kb_from_file(vocab):
|
||||||
kb = KnowledgeBase(vocab, entity_vector_length=1)
|
kb = KnowledgeBase(vocab, entity_vector_length=1)
|
||||||
kb.from_disk(kb_path)
|
kb.from_disk(kb_path)
|
||||||
|
|
|
@ -637,13 +637,6 @@ into the "real world". This requires 3 main components:
|
||||||
> window_size = 1
|
> window_size = 1
|
||||||
> maxout_pieces = 3
|
> maxout_pieces = 3
|
||||||
> subword_features = true
|
> subword_features = true
|
||||||
>
|
|
||||||
> [kb_loader]
|
|
||||||
> @misc = "spacy.EmptyKB.v1"
|
|
||||||
> entity_vector_length = 64
|
|
||||||
>
|
|
||||||
> [get_candidates]
|
|
||||||
> @misc = "spacy.CandidateGenerator.v1"
|
|
||||||
> ```
|
> ```
|
||||||
|
|
||||||
The `EntityLinker` model architecture is a Thinc `Model` with a
|
The `EntityLinker` model architecture is a Thinc `Model` with a
|
||||||
|
@ -657,13 +650,21 @@ The `EntityLinker` model architecture is a Thinc `Model` with a
|
||||||
|
|
||||||
### spacy.EmptyKB.v1 {#EmptyKB}
|
### spacy.EmptyKB.v1 {#EmptyKB}
|
||||||
|
|
||||||
A function that creates a default, empty `KnowledgeBase` from a
|
A function that creates an empty `KnowledgeBase` from a [`Vocab`](/api/vocab)
|
||||||
[`Vocab`](/api/vocab) instance.
|
instance. This is the default when a new entity linker component is created.
|
||||||
|
|
||||||
| Name | Description |
|
| Name | Description |
|
||||||
| ---------------------- | ----------------------------------------------------------------------------------- |
|
| ---------------------- | ----------------------------------------------------------------------------------- |
|
||||||
| `entity_vector_length` | The length of the vectors encoding each entity in the KB. Defaults to `64`. ~~int~~ |
|
| `entity_vector_length` | The length of the vectors encoding each entity in the KB. Defaults to `64`. ~~int~~ |
|
||||||
|
|
||||||
|
### spacy.KBFromFile.v1 {#KBFromFile}
|
||||||
|
|
||||||
|
A function that reads an existing `KnowledgeBase` from file.
|
||||||
|
|
||||||
|
| Name | Description |
|
||||||
|
| --------- | -------------------------------------------------------- |
|
||||||
|
| `kb_path` | The location of the KB that was stored to file. ~~Path~~ |
|
||||||
|
|
||||||
### spacy.CandidateGenerator.v1 {#CandidateGenerator}
|
### spacy.CandidateGenerator.v1 {#CandidateGenerator}
|
||||||
|
|
||||||
A function that takes as input a [`KnowledgeBase`](/api/kb) and a
|
A function that takes as input a [`KnowledgeBase`](/api/kb) and a
|
||||||
|
|
|
@ -34,20 +34,20 @@ architectures and their arguments and hyperparameters.
|
||||||
> "incl_prior": True,
|
> "incl_prior": True,
|
||||||
> "incl_context": True,
|
> "incl_context": True,
|
||||||
> "model": DEFAULT_NEL_MODEL,
|
> "model": DEFAULT_NEL_MODEL,
|
||||||
> "kb_loader": {'@misc': 'spacy.EmptyKB.v1', 'entity_vector_length': 64},
|
> "entity_vector_length": 64,
|
||||||
> "get_candidates": {'@misc': 'spacy.CandidateGenerator.v1'},
|
> "get_candidates": {'@misc': 'spacy.CandidateGenerator.v1'},
|
||||||
> }
|
> }
|
||||||
> nlp.add_pipe("entity_linker", config=config)
|
> nlp.add_pipe("entity_linker", config=config)
|
||||||
> ```
|
> ```
|
||||||
|
|
||||||
| Setting | Description |
|
| Setting | Description |
|
||||||
| ---------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
| ---------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||||
| `labels_discard` | NER labels that will automatically get a "NIL" prediction. Defaults to `[]`. ~~Iterable[str]~~ |
|
| `labels_discard` | NER labels that will automatically get a "NIL" prediction. Defaults to `[]`. ~~Iterable[str]~~ |
|
||||||
| `incl_prior` | Whether or not to include prior probabilities from the KB in the model. Defaults to `True`. ~~bool~~ |
|
| `incl_prior` | Whether or not to include prior probabilities from the KB in the model. Defaults to `True`. ~~bool~~ |
|
||||||
| `incl_context` | Whether or not to include the local context in the model. Defaults to `True`. ~~bool~~ |
|
| `incl_context` | Whether or not to include the local context in the model. Defaults to `True`. ~~bool~~ |
|
||||||
| `model` | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. Defaults to [EntityLinker](/api/architectures#EntityLinker). ~~Model~~ |
|
| `model` | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. Defaults to [EntityLinker](/api/architectures#EntityLinker). ~~Model~~ |
|
||||||
| `kb_loader` | Function that creates a [`KnowledgeBase`](/api/kb) from a `Vocab` instance. Defaults to [EmptyKB](/api/architectures#EmptyKB), a function returning an empty `KnowledgeBase` with an `entity_vector_length` of `64`. ~~Callable[[Vocab], KnowledgeBase]~~ |
|
| `entity_vector_length` | Size of encoding vectors in the KB. Defaults to 64. ~~int~~ |
|
||||||
| `get_candidates` | Function that generates plausible candidates for a given `Span` object. Defaults to [CandidateGenerator](/api/architectures#CandidateGenerator), a function looking up exact, case-dependent aliases in the KB. ~~Callable[[KnowledgeBase, Span], Iterable[Candidate]]~~ |
|
| `get_candidates` | Function that generates plausible candidates for a given `Span` object. Defaults to [CandidateGenerator](/api/architectures#CandidateGenerator), a function looking up exact, case-dependent aliases in the KB. ~~Callable[[KnowledgeBase, Span], Iterable[Candidate]]~~ |
|
||||||
|
|
||||||
```python
|
```python
|
||||||
%%GITHUB_SPACY/spacy/pipeline/entity_linker.py
|
%%GITHUB_SPACY/spacy/pipeline/entity_linker.py
|
||||||
|
@ -65,10 +65,6 @@ architectures and their arguments and hyperparameters.
|
||||||
> config = {"model": {"@architectures": "my_el.v1"}}
|
> config = {"model": {"@architectures": "my_el.v1"}}
|
||||||
> entity_linker = nlp.add_pipe("entity_linker", config=config)
|
> entity_linker = nlp.add_pipe("entity_linker", config=config)
|
||||||
>
|
>
|
||||||
> # Construction via add_pipe with custom KB and candidate generation
|
|
||||||
> config = {"kb": {"@misc": "my_kb.v1"}}
|
|
||||||
> entity_linker = nlp.add_pipe("entity_linker", config=config)
|
|
||||||
>
|
|
||||||
> # Construction from class
|
> # Construction from class
|
||||||
> from spacy.pipeline import EntityLinker
|
> from spacy.pipeline import EntityLinker
|
||||||
> entity_linker = EntityLinker(nlp.vocab, model)
|
> entity_linker = EntityLinker(nlp.vocab, model)
|
||||||
|
@ -76,21 +72,25 @@ architectures and their arguments and hyperparameters.
|
||||||
|
|
||||||
Create a new pipeline instance. In your application, you would normally use a
|
Create a new pipeline instance. In your application, you would normally use a
|
||||||
shortcut for this and instantiate the component using its string name and
|
shortcut for this and instantiate the component using its string name and
|
||||||
[`nlp.add_pipe`](/api/language#add_pipe). Note that both the internal
|
[`nlp.add_pipe`](/api/language#add_pipe).
|
||||||
`KnowledgeBase` as well as the Candidate generator can be customized by
|
|
||||||
providing custom registered functions.
|
|
||||||
|
|
||||||
| Name | Description |
|
Upon construction of the entity linker component, an empty knowledge base is
|
||||||
| ---------------- | -------------------------------------------------------------------------------------------------------------------------------- |
|
constructed with the provided `entity_vector_length`. If you want to use a
|
||||||
| `vocab` | The shared vocabulary. ~~Vocab~~ |
|
custom knowledge base, you should either call
|
||||||
| `model` | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. ~~Model~~ |
|
[`set_kb`](/api/entitylinker#set_kb) or provide a `kb_loader` in the
|
||||||
| `name` | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~ |
|
[`initialize`](/api/entitylinker#initialize) call.
|
||||||
| _keyword-only_ | |
|
|
||||||
| `kb_loader` | Function that creates a [`KnowledgeBase`](/api/kb) from a `Vocab` instance. ~~Callable[[Vocab], KnowledgeBase]~~ |
|
| Name | Description |
|
||||||
| `get_candidates` | Function that generates plausible candidates for a given `Span` object. ~~Callable[[KnowledgeBase, Span], Iterable[Candidate]]~~ |
|
| ---------------------- | -------------------------------------------------------------------------------------------------------------------------------- |
|
||||||
| `labels_discard` | NER labels that will automatically get a `"NIL"` prediction. ~~Iterable[str]~~ |
|
| `vocab` | The shared vocabulary. ~~Vocab~~ |
|
||||||
| `incl_prior` | Whether or not to include prior probabilities from the KB in the model. ~~bool~~ |
|
| `model` | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. ~~Model~~ |
|
||||||
| `incl_context` | Whether or not to include the local context in the model. ~~bool~~ |
|
| `name` | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~ |
|
||||||
|
| _keyword-only_ | |
|
||||||
|
| `entity_vector_length` | Size of encoding vectors in the KB. ~~int~~ |
|
||||||
|
| `get_candidates` | Function that generates plausible candidates for a given `Span` object. ~~Callable[[KnowledgeBase, Span], Iterable[Candidate]]~~ |
|
||||||
|
| `labels_discard` | NER labels that will automatically get a `"NIL"` prediction. ~~Iterable[str]~~ |
|
||||||
|
| `incl_prior` | Whether or not to include prior probabilities from the KB in the model. ~~bool~~ |
|
||||||
|
| `incl_context` | Whether or not to include the local context in the model. ~~bool~~ |
|
||||||
|
|
||||||
## EntityLinker.\_\_call\_\_ {#call tag="method"}
|
## EntityLinker.\_\_call\_\_ {#call tag="method"}
|
||||||
|
|
||||||
|
@ -139,6 +139,28 @@ applied to the `Doc` in order. Both [`__call__`](/api/entitylinker#call) and
|
||||||
| `batch_size` | The number of documents to buffer. Defaults to `128`. ~~int~~ |
|
| `batch_size` | The number of documents to buffer. Defaults to `128`. ~~int~~ |
|
||||||
| **YIELDS** | The processed documents in order. ~~Doc~~ |
|
| **YIELDS** | The processed documents in order. ~~Doc~~ |
|
||||||
|
|
||||||
|
## EntityLinker.set_kb {#initialize tag="method" new="3"}
|
||||||
|
|
||||||
|
The `kb_loader` should be a function that takes a `Vocab` instance and creates
|
||||||
|
the `KnowledgeBase`, ensuring that the strings of the knowledge base are synced
|
||||||
|
with the current vocab.
|
||||||
|
|
||||||
|
> #### Example
|
||||||
|
>
|
||||||
|
> ```python
|
||||||
|
> def create_kb(vocab):
|
||||||
|
> kb = KnowledgeBase(vocab, entity_vector_length=128)
|
||||||
|
> kb.add_entity(...)
|
||||||
|
> kb.add_alias(...)
|
||||||
|
> return kb
|
||||||
|
> entity_linker = nlp.add_pipe("entity_linker")
|
||||||
|
> entity_linker.set_kb(lambda: [], nlp=nlp, kb_loader=create_kb)
|
||||||
|
> ```
|
||||||
|
|
||||||
|
| Name | Description |
|
||||||
|
| ----------- | ---------------------------------------------------------------------------------------------------------------- |
|
||||||
|
| `kb_loader` | Function that creates a [`KnowledgeBase`](/api/kb) from a `Vocab` instance. ~~Callable[[Vocab], KnowledgeBase]~~ |
|
||||||
|
|
||||||
## EntityLinker.initialize {#initialize tag="method" new="3"}
|
## EntityLinker.initialize {#initialize tag="method" new="3"}
|
||||||
|
|
||||||
Initialize the component for training. `get_examples` should be a function that
|
Initialize the component for training. `get_examples` should be a function that
|
||||||
|
@ -150,6 +172,11 @@ network,
|
||||||
setting up the label scheme based on the data. This method is typically called
|
setting up the label scheme based on the data. This method is typically called
|
||||||
by [`Language.initialize`](/api/language#initialize).
|
by [`Language.initialize`](/api/language#initialize).
|
||||||
|
|
||||||
|
Optionally, a `kb_loader` argument may be specified to change the internal
|
||||||
|
knowledge base. This argument should be a function that takes a `Vocab` instance
|
||||||
|
and creates the `KnowledgeBase`, ensuring that the strings of the knowledge base
|
||||||
|
are synced with the current vocab.
|
||||||
|
|
||||||
<Infobox variant="warning" title="Changed in v3.0" id="begin_training">
|
<Infobox variant="warning" title="Changed in v3.0" id="begin_training">
|
||||||
|
|
||||||
This method was previously called `begin_training`.
|
This method was previously called `begin_training`.
|
||||||
|
@ -160,7 +187,7 @@ This method was previously called `begin_training`.
|
||||||
>
|
>
|
||||||
> ```python
|
> ```python
|
||||||
> entity_linker = nlp.add_pipe("entity_linker")
|
> entity_linker = nlp.add_pipe("entity_linker")
|
||||||
> entity_linker.initialize(lambda: [], nlp=nlp)
|
> entity_linker.initialize(lambda: [], nlp=nlp, kb_loader=my_kb)
|
||||||
> ```
|
> ```
|
||||||
|
|
||||||
| Name | Description |
|
| Name | Description |
|
||||||
|
@ -168,6 +195,7 @@ This method was previously called `begin_training`.
|
||||||
| `get_examples` | Function that returns gold-standard annotations in the form of [`Example`](/api/example) objects. ~~Callable[[], Iterable[Example]]~~ |
|
| `get_examples` | Function that returns gold-standard annotations in the form of [`Example`](/api/example) objects. ~~Callable[[], Iterable[Example]]~~ |
|
||||||
| _keyword-only_ | |
|
| _keyword-only_ | |
|
||||||
| `nlp` | The current `nlp` object. Defaults to `None`. ~~Optional[Language]~~ |
|
| `nlp` | The current `nlp` object. Defaults to `None`. ~~Optional[Language]~~ |
|
||||||
|
| `kb_loader` | Function that creates a [`KnowledgeBase`](/api/kb) from a `Vocab` instance. ~~Callable[[Vocab], KnowledgeBase]~~ |
|
||||||
|
|
||||||
## EntityLinker.predict {#predict tag="method"}
|
## EntityLinker.predict {#predict tag="method"}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user