mirror of
https://github.com/explosion/spaCy.git
synced 2025-07-01 02:13:07 +03:00
Update KB serialization tests. Update docs.
This commit is contained in:
parent
b6d9f4c5ab
commit
cc137073f6
|
@ -86,9 +86,6 @@ def test_serialize_subclassed_kb():
|
||||||
[nlp]
|
[nlp]
|
||||||
lang = "en"
|
lang = "en"
|
||||||
pipeline = ["entity_linker"]
|
pipeline = ["entity_linker"]
|
||||||
|
|
||||||
[default_values]
|
|
||||||
custom_field = 666
|
|
||||||
|
|
||||||
[components]
|
[components]
|
||||||
|
|
||||||
|
@ -96,8 +93,7 @@ def test_serialize_subclassed_kb():
|
||||||
factory = "entity_linker"
|
factory = "entity_linker"
|
||||||
|
|
||||||
[components.entity_linker.generate_empty_kb]
|
[components.entity_linker.generate_empty_kb]
|
||||||
@misc = "spacy.CustomEmptyKB.v1"
|
@misc = "kb_test.CustomEmptyKB.v1"
|
||||||
custom_field = ${default_values.custom_field}
|
|
||||||
|
|
||||||
[initialize]
|
[initialize]
|
||||||
|
|
||||||
|
@ -106,9 +102,9 @@ def test_serialize_subclassed_kb():
|
||||||
[initialize.components.entity_linker]
|
[initialize.components.entity_linker]
|
||||||
|
|
||||||
[initialize.components.entity_linker.kb_loader]
|
[initialize.components.entity_linker.kb_loader]
|
||||||
@misc = "spacy.CustomKB.v1"
|
@misc = "kb_test.CustomKB.v1"
|
||||||
entity_vector_length = 342
|
entity_vector_length = 342
|
||||||
custom_field = ${default_values.custom_field}
|
custom_field = 666
|
||||||
"""
|
"""
|
||||||
|
|
||||||
class SubInMemoryLookupKB(InMemoryLookupKB):
|
class SubInMemoryLookupKB(InMemoryLookupKB):
|
||||||
|
@ -116,20 +112,18 @@ def test_serialize_subclassed_kb():
|
||||||
super().__init__(vocab, entity_vector_length)
|
super().__init__(vocab, entity_vector_length)
|
||||||
self.custom_field = custom_field
|
self.custom_field = custom_field
|
||||||
|
|
||||||
@registry.misc("spacy.CustomEmptyKB.v1")
|
@registry.misc("kb_test.CustomEmptyKB.v1")
|
||||||
def empty_custom_kb(
|
def empty_custom_kb() -> Callable[[Vocab, int], SubInMemoryLookupKB]:
|
||||||
custom_field: int,
|
|
||||||
) -> Callable[[Vocab, int], SubInMemoryLookupKB]:
|
|
||||||
def empty_kb_factory(vocab: Vocab, entity_vector_length: int):
|
def empty_kb_factory(vocab: Vocab, entity_vector_length: int):
|
||||||
return SubInMemoryLookupKB(
|
return SubInMemoryLookupKB(
|
||||||
vocab=vocab,
|
vocab=vocab,
|
||||||
entity_vector_length=entity_vector_length,
|
entity_vector_length=entity_vector_length,
|
||||||
custom_field=custom_field,
|
custom_field=0,
|
||||||
)
|
)
|
||||||
|
|
||||||
return empty_kb_factory
|
return empty_kb_factory
|
||||||
|
|
||||||
@registry.misc("spacy.CustomKB.v1")
|
@registry.misc("kb_test.CustomKB.v1")
|
||||||
def custom_kb(
|
def custom_kb(
|
||||||
entity_vector_length: int, custom_field: int
|
entity_vector_length: int, custom_field: int
|
||||||
) -> Callable[[Vocab], InMemoryLookupKB]:
|
) -> Callable[[Vocab], InMemoryLookupKB]:
|
||||||
|
|
|
@ -899,15 +899,21 @@ The `EntityLinker` model architecture is a Thinc `Model` with a
|
||||||
| `nO` | Output dimension, determined by the length of the vectors encoding each entity in the KB. If the `nO` dimension is not set, the entity linking component will set it when `initialize` is called. ~~Optional[int]~~ |
|
| `nO` | Output dimension, determined by the length of the vectors encoding each entity in the KB. If the `nO` dimension is not set, the entity linking component will set it when `initialize` is called. ~~Optional[int]~~ |
|
||||||
| **CREATES** | The model using the architecture. ~~Model[List[Doc], Floats2d]~~ |
|
| **CREATES** | The model using the architecture. ~~Model[List[Doc], Floats2d]~~ |
|
||||||
|
|
||||||
### spacy.EmptyKB.v1 {id="EmptyKB"}
|
### spacy.EmptyKB.v1 {id="EmptyKB.v1"}
|
||||||
|
|
||||||
A function that creates an empty `KnowledgeBase` from a [`Vocab`](/api/vocab)
|
A function that creates an empty `KnowledgeBase` from a [`Vocab`](/api/vocab)
|
||||||
instance. This is the default when a new entity linker component is created.
|
instance.
|
||||||
|
|
||||||
| Name | Description |
|
| Name | Description |
|
||||||
| ---------------------- | ----------------------------------------------------------------------------------- |
|
| ---------------------- | ----------------------------------------------------------------------------------- |
|
||||||
| `entity_vector_length` | The length of the vectors encoding each entity in the KB. Defaults to `64`. ~~int~~ |
|
| `entity_vector_length` | The length of the vectors encoding each entity in the KB. Defaults to `64`. ~~int~~ |
|
||||||
|
|
||||||
|
### spacy.EmptyKB.v2 {id="EmptyKB"}
|
||||||
|
|
||||||
|
A function that creates an empty `KnowledgeBase` from a [`Vocab`](/api/vocab)
|
||||||
|
instance. This is the default when a new entity linker component is created. It
|
||||||
|
returns a `Callable[[Vocab, int], InMemoryLookupKB]`.
|
||||||
|
|
||||||
### spacy.KBFromFile.v1 {id="KBFromFile"}
|
### spacy.KBFromFile.v1 {id="KBFromFile"}
|
||||||
|
|
||||||
A function that reads an existing `KnowledgeBase` from file.
|
A function that reads an existing `KnowledgeBase` from file.
|
||||||
|
|
|
@ -64,7 +64,7 @@ architectures and their arguments and hyperparameters.
|
||||||
| `use_gold_ents` | Whether to copy entities from the gold docs or not. Defaults to `True`. If `False`, entities must be set in the training data or by an annotating component in the pipeline. ~~int~~ |
|
| `use_gold_ents` | Whether to copy entities from the gold docs or not. Defaults to `True`. If `False`, entities must be set in the training data or by an annotating component in the pipeline. ~~int~~ |
|
||||||
| `get_candidates` | Function that generates plausible candidates for a given `Span` object. Defaults to [CandidateGenerator](/api/architectures#CandidateGenerator), a function looking up exact, case-dependent aliases in the KB. ~~Callable[[KnowledgeBase, Span], Iterable[Candidate]]~~ |
|
| `get_candidates` | Function that generates plausible candidates for a given `Span` object. Defaults to [CandidateGenerator](/api/architectures#CandidateGenerator), a function looking up exact, case-dependent aliases in the KB. ~~Callable[[KnowledgeBase, Span], Iterable[Candidate]]~~ |
|
||||||
| `get_candidates_batch` <Tag variant="new">3.5</Tag> | Function that generates plausible candidates for a given batch of `Span` objects. Defaults to [CandidateBatchGenerator](/api/architectures#CandidateBatchGenerator), a function looking up exact, case-dependent aliases in the KB. ~~Callable[[KnowledgeBase, Iterable[Span]], Iterable[Iterable[Candidate]]]~~ |
|
| `get_candidates_batch` <Tag variant="new">3.5</Tag> | Function that generates plausible candidates for a given batch of `Span` objects. Defaults to [CandidateBatchGenerator](/api/architectures#CandidateBatchGenerator), a function looking up exact, case-dependent aliases in the KB. ~~Callable[[KnowledgeBase, Iterable[Span]], Iterable[Iterable[Candidate]]]~~ |
|
||||||
| `generate_empty_kb` <Tag variant="new">3.6</Tag> | Function that generates an empty `KnowledgeBase` object. Defaults to generating an empty [`InMemoryLookupKB`](/api/inmemorylookupkb). ~~Callable[[Vocab, int], KnowledgeBase]~~ |
|
| `generate_empty_kb` <Tag variant="new">3.6</Tag> | Function that generates an empty `KnowledgeBase` object. Defaults to [`spacy.EmptyKB.v2`](/api/architectures#EmptyKB), which generates an empty [`InMemoryLookupKB`](/api/inmemorylookupkb). ~~Callable[[Vocab, int], KnowledgeBase]~~ |
|
||||||
| `overwrite` <Tag variant="new">3.2</Tag> | Whether existing annotation is overwritten. Defaults to `True`. ~~bool~~ |
|
| `overwrite` <Tag variant="new">3.2</Tag> | Whether existing annotation is overwritten. Defaults to `True`. ~~bool~~ |
|
||||||
| `scorer` <Tag variant="new">3.2</Tag> | The scoring method. Defaults to [`Scorer.score_links`](/api/scorer#score_links). ~~Optional[Callable]~~ |
|
| `scorer` <Tag variant="new">3.2</Tag> | The scoring method. Defaults to [`Scorer.score_links`](/api/scorer#score_links). ~~Optional[Callable]~~ |
|
||||||
| `threshold` <Tag variant="new">3.4</Tag> | Confidence threshold for entity predictions. The default of `None` implies that all predictions are accepted, otherwise those with a score beneath the treshold are discarded. If there are no predictions with scores above the threshold, the linked entity is `NIL`. ~~Optional[float]~~ |
|
| `threshold` <Tag variant="new">3.4</Tag> | Confidence threshold for entity predictions. The default of `None` implies that all predictions are accepted, otherwise those with a score beneath the treshold are discarded. If there are no predictions with scores above the threshold, the linked entity is `NIL`. ~~Optional[float]~~ |
|
||||||
|
|
Loading…
Reference in New Issue
Block a user