mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	Also rename to include_static_vectors in CharEmbed
This commit is contained in:
		
							parent
							
								
									67652bcbb5
								
							
						
					
					
						commit
						39aabf50ab
					
				| 
						 | 
					@ -177,7 +177,7 @@ def CharacterEmbed(
 | 
				
			||||||
    rows: int,
 | 
					    rows: int,
 | 
				
			||||||
    nM: int,
 | 
					    nM: int,
 | 
				
			||||||
    nC: int,
 | 
					    nC: int,
 | 
				
			||||||
    also_use_static_vectors: bool,
 | 
					    include_static_vectors: bool,
 | 
				
			||||||
    feature: Union[int, str] = "LOWER",
 | 
					    feature: Union[int, str] = "LOWER",
 | 
				
			||||||
) -> Model[List[Doc], List[Floats2d]]:
 | 
					) -> Model[List[Doc], List[Floats2d]]:
 | 
				
			||||||
    """Construct an embedded representation based on character embeddings, using
 | 
					    """Construct an embedded representation based on character embeddings, using
 | 
				
			||||||
| 
						 | 
					@ -204,13 +204,13 @@ def CharacterEmbed(
 | 
				
			||||||
    nC (int): The number of UTF-8 bytes to embed per word. Recommended values
 | 
					    nC (int): The number of UTF-8 bytes to embed per word. Recommended values
 | 
				
			||||||
        are between 3 and 8, although it may depend on the length of words in the
 | 
					        are between 3 and 8, although it may depend on the length of words in the
 | 
				
			||||||
        language.
 | 
					        language.
 | 
				
			||||||
    also_use_static_vectors (bool): Whether to also use static word vectors.
 | 
					    include_static_vectors (bool): Whether to also use static word vectors.
 | 
				
			||||||
        Requires a vectors table to be loaded in the Doc objects' vocab.
 | 
					        Requires a vectors table to be loaded in the Doc objects' vocab.
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
    feature = intify_attr(feature)
 | 
					    feature = intify_attr(feature)
 | 
				
			||||||
    if feature is None:
 | 
					    if feature is None:
 | 
				
			||||||
        raise ValueError(Errors.E911(feat=feature))
 | 
					        raise ValueError(Errors.E911(feat=feature))
 | 
				
			||||||
    if also_use_static_vectors:
 | 
					    if include_static_vectors:
 | 
				
			||||||
        model = chain(
 | 
					        model = chain(
 | 
				
			||||||
            concatenate(
 | 
					            concatenate(
 | 
				
			||||||
                chain(_character_embed.CharacterEmbed(nM=nM, nC=nC), list2ragged()),
 | 
					                chain(_character_embed.CharacterEmbed(nM=nM, nC=nC), list2ragged()),
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -32,7 +32,7 @@ width = 128
 | 
				
			||||||
rows = 7000
 | 
					rows = 7000
 | 
				
			||||||
nM = 64
 | 
					nM = 64
 | 
				
			||||||
nC = 8
 | 
					nC = 8
 | 
				
			||||||
also_use_static_vectors = false
 | 
					include_static_vectors = false
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[model.tok2vec.encode]
 | 
					[model.tok2vec.encode]
 | 
				
			||||||
@architectures = "spacy.MaxoutWindowEncoder.v1"
 | 
					@architectures = "spacy.MaxoutWindowEncoder.v1"
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -63,8 +63,8 @@ def test_tok2vec_batch_sizes(batch_size, width, embed_size):
 | 
				
			||||||
    [
 | 
					    [
 | 
				
			||||||
        (8, MultiHashEmbed, {"rows": [100, 100], "attrs": ["SHAPE", "LOWER"], "include_static_vectors": False}, MaxoutWindowEncoder, {"window_size": 1, "maxout_pieces": 3, "depth": 2}),
 | 
					        (8, MultiHashEmbed, {"rows": [100, 100], "attrs": ["SHAPE", "LOWER"], "include_static_vectors": False}, MaxoutWindowEncoder, {"window_size": 1, "maxout_pieces": 3, "depth": 2}),
 | 
				
			||||||
        (8, MultiHashEmbed, {"rows": [100, 20], "attrs": ["ORTH", "PREFIX"], "include_static_vectors": False}, MishWindowEncoder, {"window_size": 1, "depth": 6}),
 | 
					        (8, MultiHashEmbed, {"rows": [100, 20], "attrs": ["ORTH", "PREFIX"], "include_static_vectors": False}, MishWindowEncoder, {"window_size": 1, "depth": 6}),
 | 
				
			||||||
        (8, CharacterEmbed, {"rows": 100, "nM": 64, "nC": 8, "also_use_static_vectors": False}, MaxoutWindowEncoder, {"window_size": 1, "maxout_pieces": 3, "depth": 3}),
 | 
					        (8, CharacterEmbed, {"rows": 100, "nM": 64, "nC": 8, "include_static_vectors": False}, MaxoutWindowEncoder, {"window_size": 1, "maxout_pieces": 3, "depth": 3}),
 | 
				
			||||||
        (8, CharacterEmbed, {"rows": 100, "nM": 16, "nC": 2, "also_use_static_vectors": False}, MishWindowEncoder, {"window_size": 1, "depth": 3}),
 | 
					        (8, CharacterEmbed, {"rows": 100, "nM": 16, "nC": 2, "include_static_vectors": False}, MishWindowEncoder, {"window_size": 1, "depth": 3}),
 | 
				
			||||||
    ],
 | 
					    ],
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
# fmt: on
 | 
					# fmt: on
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user