spaCy/spacy/ml/models/entity_linker.py
Sofie Van Landeghem 82347110f5
Default empty KB in EL component (#5872)
* EL field documentation

* documentation consistent with docs

* default empty KB, initialize vocab separately

* formatting

* add test for changing the default entity vector length

* update comment
2020-08-04 14:34:09 +02:00

40 lines
1.3 KiB
Python

from typing import Optional
from thinc.api import chain, clone, list2ragged, reduce_mean, residual
from thinc.api import Model, Maxout, Linear
from ...util import registry
from ...kb import KnowledgeBase
from ...vocab import Vocab
@registry.architectures.register("spacy.EntityLinker.v1")
def build_nel_encoder(tok2vec: Model, nO: Optional[int] = None) -> Model:
with Model.define_operators({">>": chain, "**": clone}):
token_width = tok2vec.get_dim("nO")
output_layer = Linear(nO=nO, nI=token_width)
model = (
tok2vec
>> list2ragged()
>> reduce_mean()
>> residual(Maxout(nO=token_width, nI=token_width, nP=2, dropout=0.0))
>> output_layer
)
model.set_ref("output_layer", output_layer)
model.set_ref("tok2vec", tok2vec)
return model
@registry.assets.register("spacy.KBFromFile.v1")
def load_kb(vocab_path: str, kb_path: str) -> KnowledgeBase:
vocab = Vocab().from_disk(vocab_path)
kb = KnowledgeBase(entity_vector_length=1)
kb.initialize(vocab)
kb.load_bulk(kb_path)
return kb
@registry.assets.register("spacy.EmptyKB.v1")
def empty_kb(entity_vector_length: int) -> KnowledgeBase:
kb = KnowledgeBase(entity_vector_length=entity_vector_length)
return kb