mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	* setting KB in the EL constructor, similar to how the model is passed on * removing wikipedia example files - moved to projects * throw an error when nlp.update is called with 2 positional arguments * rewriting the config logic in create pipe to accomodate for other objects (e.g. KB) in the config * update config files with new parameters * avoid training pipeline components that don't have a model (like sentencizer) * various small fixes + UX improvements * small fixes * set thinc to 8.0.0a9 everywhere * remove outdated comment
		
			
				
	
	
		
			34 lines
		
	
	
		
			1.0 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			34 lines
		
	
	
		
			1.0 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
from pathlib import Path
 | 
						|
 | 
						|
from thinc.api import chain, clone, list2ragged, reduce_mean, residual
 | 
						|
from thinc.api import Model, Maxout, Linear
 | 
						|
 | 
						|
from ...util import registry
 | 
						|
from ...kb import KnowledgeBase
 | 
						|
from ...vocab import Vocab
 | 
						|
 | 
						|
 | 
						|
@registry.architectures.register("spacy.EntityLinker.v1")
 | 
						|
def build_nel_encoder(tok2vec, nO=None):
 | 
						|
    with Model.define_operators({">>": chain, "**": clone}):
 | 
						|
        token_width = tok2vec.get_dim("nO")
 | 
						|
        output_layer = Linear(nO=nO, nI=token_width)
 | 
						|
        model = (
 | 
						|
            tok2vec
 | 
						|
            >> list2ragged()
 | 
						|
            >> reduce_mean()
 | 
						|
            >> residual(Maxout(nO=token_width, nI=token_width, nP=2, dropout=0.0))
 | 
						|
            >> output_layer
 | 
						|
        )
 | 
						|
        model.set_ref("output_layer", output_layer)
 | 
						|
        model.set_ref("tok2vec", tok2vec)
 | 
						|
    return model
 | 
						|
 | 
						|
 | 
						|
@registry.assets.register("spacy.KBFromFile.v1")
 | 
						|
def load_kb(nlp_path, kb_path) -> KnowledgeBase:
 | 
						|
    vocab = Vocab().from_disk(Path(nlp_path) / "vocab")
 | 
						|
    kb = KnowledgeBase(vocab=vocab)
 | 
						|
    kb.load_bulk(kb_path)
 | 
						|
    return kb
 |