diff --git a/spacy/language.py b/spacy/language.py index aa57989ac..204b24ecb 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -701,7 +701,8 @@ class Language: if ( self.vocab.vectors.shape != source.vocab.vectors.shape or self.vocab.vectors.key2row != source.vocab.vectors.key2row - or self.vocab.vectors.to_bytes() != source.vocab.vectors.to_bytes() + or self.vocab.vectors.to_bytes(exclude=["strings"]) + != source.vocab.vectors.to_bytes(exclude=["strings"]) ): warnings.warn(Warnings.W113.format(name=source_name)) if source_name not in source.component_names: @@ -1822,7 +1823,9 @@ class Language: ) if model not in source_nlp_vectors_hashes: source_nlp_vectors_hashes[model] = hash( - source_nlps[model].vocab.vectors.to_bytes() + source_nlps[model].vocab.vectors.to_bytes( + exclude=["strings"] + ) ) if "_sourced_vectors_hashes" not in nlp.meta: nlp.meta["_sourced_vectors_hashes"] = {} diff --git a/spacy/training/initialize.py b/spacy/training/initialize.py index 13ccfeb93..084204389 100644 --- a/spacy/training/initialize.py +++ b/spacy/training/initialize.py @@ -132,7 +132,7 @@ def init_vocab( logger.info(f"Added vectors: {vectors}") # warn if source model vectors are not identical sourced_vectors_hashes = nlp.meta.pop("_sourced_vectors_hashes", {}) - vectors_hash = hash(nlp.vocab.vectors.to_bytes()) + vectors_hash = hash(nlp.vocab.vectors.to_bytes(exclude=["strings"])) for sourced_component, sourced_vectors_hash in sourced_vectors_hashes.items(): if vectors_hash != sourced_vectors_hash: warnings.warn(Warnings.W113.format(name=sourced_component))