spaCy/spacy/ml/models/entity_linker.py
Sofie Van Landeghem 7f5715a081
Various fixes to NEL functionality, Example class etc (#5460)
* setting KB in the EL constructor, similar to how the model is passed on

* removing wikipedia example files - moved to projects

* throw an error when nlp.update is called with 2 positional arguments

* rewriting the config logic in create pipe to accomodate for other objects (e.g. KB) in the config

* update config files with new parameters

* avoid training pipeline components that don't have a model (like sentencizer)

* various small fixes + UX improvements

* small fixes

* set thinc to 8.0.0a9 everywhere

* remove outdated comment
2020-05-20 11:41:12 +02:00

34 lines
1.0 KiB
Python

from pathlib import Path
from thinc.api import chain, clone, list2ragged, reduce_mean, residual
from thinc.api import Model, Maxout, Linear
from ...util import registry
from ...kb import KnowledgeBase
from ...vocab import Vocab
@registry.architectures.register("spacy.EntityLinker.v1")
def build_nel_encoder(tok2vec, nO=None):
with Model.define_operators({">>": chain, "**": clone}):
token_width = tok2vec.get_dim("nO")
output_layer = Linear(nO=nO, nI=token_width)
model = (
tok2vec
>> list2ragged()
>> reduce_mean()
>> residual(Maxout(nO=token_width, nI=token_width, nP=2, dropout=0.0))
>> output_layer
)
model.set_ref("output_layer", output_layer)
model.set_ref("tok2vec", tok2vec)
return model
@registry.assets.register("spacy.KBFromFile.v1")
def load_kb(nlp_path, kb_path) -> KnowledgeBase:
vocab = Vocab().from_disk(Path(nlp_path) / "vocab")
kb = KnowledgeBase(vocab=vocab)
kb.load_bulk(kb_path)
return kb