add types of Tok2Vec embedding layers

This commit is contained in:
svlandeg 2020-10-01 09:20:09 +02:00
parent 64d90039a1
commit 5121972930

View File

@ -93,7 +93,7 @@ def build_Tok2Vec_model(
@registry.architectures.register("spacy.MultiHashEmbed.v1") @registry.architectures.register("spacy.MultiHashEmbed.v1")
def MultiHashEmbed( def MultiHashEmbed(
width: int, rows: int, also_embed_subwords: bool, also_use_static_vectors: bool width: int, rows: int, also_embed_subwords: bool, also_use_static_vectors: bool
): ) -> Model[List[Doc], List[Floats2d]]:
"""Construct an embedding layer that separately embeds a number of lexical """Construct an embedding layer that separately embeds a number of lexical
attributes using hash embedding, concatenates the results, and passes it attributes using hash embedding, concatenates the results, and passes it
through a feed-forward subnetwork to build a mixed representations. through a feed-forward subnetwork to build a mixed representations.
@ -166,7 +166,7 @@ def MultiHashEmbed(
@registry.architectures.register("spacy.CharacterEmbed.v1") @registry.architectures.register("spacy.CharacterEmbed.v1")
def CharacterEmbed( def CharacterEmbed(
width: int, rows: int, nM: int, nC: int, also_use_static_vectors: bool width: int, rows: int, nM: int, nC: int, also_use_static_vectors: bool
): ) -> Model[List[Doc], List[Floats2d]]:
"""Construct an embedded representation based on character embeddings, using """Construct an embedded representation based on character embeddings, using
a feed-forward network. A fixed number of UTF-8 byte characters are used for a feed-forward network. A fixed number of UTF-8 byte characters are used for
each word, taken from the beginning and end of the word equally. Padding is each word, taken from the beginning and end of the word equally. Padding is