mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	Merge pull request #5977 from explosion/refactor/vector-names
This commit is contained in:
		
						commit
						cdc114e212
					
				| 
						 | 
				
			
			@ -273,10 +273,6 @@ class Errors:
 | 
			
		|||
            "existing extension, set `force=True` on `{obj}.set_extension`.")
 | 
			
		||||
    E091 = ("Invalid extension attribute {name}: expected callable or None, "
 | 
			
		||||
            "but got: {value}")
 | 
			
		||||
    E092 = ("Could not find or assign name for word vectors. Ususally, the "
 | 
			
		||||
            "name is read from the model's meta.json in vector.name. "
 | 
			
		||||
            "Alternatively, it is built from the 'lang' and 'name' keys in "
 | 
			
		||||
            "the meta.json. Vector names are required to avoid issue #1660.")
 | 
			
		||||
    E093 = ("token.ent_iob values make invalid sequence: I without B\n{seq}")
 | 
			
		||||
    E094 = ("Error reading line {line_num} in vectors file {loc}.")
 | 
			
		||||
    E095 = ("Can't write to frozen dictionary. This is likely an internal "
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1538,7 +1538,6 @@ class Language:
 | 
			
		|||
        def deserialize_vocab(path: Path) -> None:
 | 
			
		||||
            if path.exists():
 | 
			
		||||
                self.vocab.from_disk(path)
 | 
			
		||||
            _fix_pretrained_vectors_name(self)
 | 
			
		||||
 | 
			
		||||
        path = util.ensure_path(path)
 | 
			
		||||
        deserializers = {}
 | 
			
		||||
| 
						 | 
				
			
			@ -1605,14 +1604,10 @@ class Language:
 | 
			
		|||
            # from self.vocab.vectors, so set the name directly
 | 
			
		||||
            self.vocab.vectors.name = data.get("vectors", {}).get("name")
 | 
			
		||||
 | 
			
		||||
        def deserialize_vocab(b):
 | 
			
		||||
            self.vocab.from_bytes(b)
 | 
			
		||||
            _fix_pretrained_vectors_name(self)
 | 
			
		||||
 | 
			
		||||
        deserializers = {}
 | 
			
		||||
        deserializers["config.cfg"] = lambda b: self.config.from_bytes(b)
 | 
			
		||||
        deserializers["meta.json"] = deserialize_meta
 | 
			
		||||
        deserializers["vocab"] = deserialize_vocab
 | 
			
		||||
        deserializers["vocab"] = self.vocab.from_bytes
 | 
			
		||||
        deserializers["tokenizer"] = lambda b: self.tokenizer.from_bytes(
 | 
			
		||||
            b, exclude=["vocab"]
 | 
			
		||||
        )
 | 
			
		||||
| 
						 | 
				
			
			@ -1646,25 +1641,6 @@ class FactoryMeta:
 | 
			
		|||
    default_score_weights: Optional[Dict[str, float]] = None  # noqa: E704
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def _fix_pretrained_vectors_name(nlp: Language) -> None:
 | 
			
		||||
    # TODO: Replace this once we handle vectors consistently as static
 | 
			
		||||
    # data
 | 
			
		||||
    if "vectors" in nlp.meta and "name" in nlp.meta["vectors"]:
 | 
			
		||||
        nlp.vocab.vectors.name = nlp.meta["vectors"]["name"]
 | 
			
		||||
    elif not nlp.vocab.vectors.size:
 | 
			
		||||
        nlp.vocab.vectors.name = None
 | 
			
		||||
    elif "name" in nlp.meta and "lang" in nlp.meta:
 | 
			
		||||
        vectors_name = f"{nlp.meta['lang']}_{nlp.meta['name']}.vectors"
 | 
			
		||||
        nlp.vocab.vectors.name = vectors_name
 | 
			
		||||
    else:
 | 
			
		||||
        raise ValueError(Errors.E092)
 | 
			
		||||
    for name, proc in nlp.pipeline:
 | 
			
		||||
        if not hasattr(proc, "cfg") or not isinstance(proc.cfg, dict):
 | 
			
		||||
            continue
 | 
			
		||||
        proc.cfg.setdefault("deprecation_fixes", {})
 | 
			
		||||
        proc.cfg["deprecation_fixes"]["vectors_name"] = nlp.vocab.vectors.name
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class DisabledPipes(list):
 | 
			
		||||
    """Manager for temporary pipeline disabling."""
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue
	
	Block a user