From f396f091dc256827031392ece21a165048870b21 Mon Sep 17 00:00:00 2001 From: svlandeg Date: Thu, 6 Aug 2020 16:40:48 +0200 Subject: [PATCH] update EL API --- website/docs/api/entitylinker.md | 46 ++++++++++++++++++-------------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/website/docs/api/entitylinker.md b/website/docs/api/entitylinker.md index 2708b9928..652574d15 100644 --- a/website/docs/api/entitylinker.md +++ b/website/docs/api/entitylinker.md @@ -33,14 +33,14 @@ architectures and their arguments and hyperparameters. > nlp.add_pipe("entity_linker", config=config) > ``` -| Setting | Type | Description | Default | -| ---------------- | -------------------------------------------------------- | ------------------------------------------------------------------------- | ------------------------------------------------------ | -| `labels_discard` | `Iterable[str]` | NER labels that will automatically get a "NIL" prediction. | `[]` | -| `incl_prior` | bool | Whether or not to include prior probabilities from the KB in the model. | `True` | -| `incl_context` | bool | Whether or not to include the local context in the model. | `True` | -| `model` | [`Model`](https://thinc.ai/docs/api-model) | The model to use. | [EntityLinker](/api/architectures#EntityLinker) | -| `kb_loader` | `Callable[[Vocab], KnowledgeBase]` | Function that creates a [`KnowledgeBase`](/api/kb) from a Vocab instance. | An empty KnowledgeBase with `entity_vector_length` 64. | -| `get_candidates` | `Callable[[KnowledgeBase, "Span"], Iterable[Candidate]]` | Function that generates plausible candidates for a given `Span` object. | Built-in dictionary-lookup function. | +| Setting | Type | Description | Default | +| ---------------- | -------------------------------------------------------- | --------------------------------------------------------------------------- | ------------------------------------------------------ | +| `labels_discard` | `Iterable[str]` | NER labels that will automatically get a "NIL" prediction. | `[]` | +| `incl_prior` | bool | Whether or not to include prior probabilities from the KB in the model. | `True` | +| `incl_context` | bool | Whether or not to include the local context in the model. | `True` | +| `model` | [`Model`](https://thinc.ai/docs/api-model) | The model to use. | [EntityLinker](/api/architectures#EntityLinker) | +| `kb_loader` | `Callable[[Vocab], KnowledgeBase]` | Function that creates a [`KnowledgeBase`](/api/kb) from a `Vocab` instance. | An empty KnowledgeBase with `entity_vector_length` 64. | +| `get_candidates` | `Callable[[KnowledgeBase, "Span"], Iterable[Candidate]]` | Function that generates plausible candidates for a given `Span` object. | Built-in dictionary-lookup function. | ```python https://github.com/explosion/spaCy/blob/develop/spacy/pipeline/entity_linker.py @@ -55,7 +55,11 @@ https://github.com/explosion/spaCy/blob/develop/spacy/pipeline/entity_linker.py > entity_linker = nlp.add_pipe("entity_linker") > > # Construction via add_pipe with custom model -> config = {"model": {"@architectures": "my_el"}} +> config = {"model": {"@architectures": "my_el.v1"}} +> entity_linker = nlp.add_pipe("entity_linker", config=config) +> +> # Construction via add_pipe with custom KB and candidate generation +> config = {"kb_loader": {"@assets": "my_kb.v1"}, "get_candidates": {"@assets": "my_candidates.v1"},} > entity_linker = nlp.add_pipe("entity_linker", config=config) > > # Construction from class @@ -67,18 +71,20 @@ Create a new pipeline instance. In your application, you would normally use a shortcut for this and instantiate the component using its string name and [`nlp.add_pipe`](/api/language#add_pipe). - +Note that both the internal KB as well as the Candidate generator can be +customized by providing custom registered functions. -| Name | Type | Description | -| ---------------- | --------------- | ------------------------------------------------------------------------------------------- | -| `vocab` | `Vocab` | The shared vocabulary. | -| `model` | `Model` | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. | -| `name` | str | String name of the component instance. Used to add entries to the `losses` during training. | -| _keyword-only_ | | | -| `kb` | `KnowlegeBase` | The [`KnowledgeBase`](/api/kb) holding all entities and their aliases. | -| `labels_discard` | `Iterable[str]` | NER labels that will automatically get a "NIL" prediction. | -| `incl_prior` | bool | Whether or not to include prior probabilities from the KB in the model. | -| `incl_context` | bool | Whether or not to include the local context in the model. | +| Name | Type | Description | +| ---------------- | -------------------------------------------------------- | ------------------------------------------------------------------------------------------- | +| `vocab` | `Vocab` | The shared vocabulary. | +| `model` | `Model` | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. | +| `name` | str | String name of the component instance. Used to add entries to the `losses` during training. | +| _keyword-only_ | | | +| `kb_loader` | `Callable[[Vocab], KnowledgeBase]` | Function that creates a [`KnowledgeBase`](/api/kb) from a `Vocab` instance. | +| `get_candidates` | `Callable[[KnowledgeBase, "Span"], Iterable[Candidate]]` | Function that generates plausible candidates for a given `Span` object. | +| `labels_discard` | `Iterable[str]` | NER labels that will automatically get a "NIL" prediction. | +| `incl_prior` | bool | Whether or not to include prior probabilities from the KB in the model. | +| `incl_context` | bool | Whether or not to include the local context in the model. | ## EntityLinker.\_\_call\_\_ {#call tag="method"}