mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 17:36:30 +03:00
Exclude strings from v3.2+ source vector checks (#9697)
Exclude strings from `Vector.to_bytes()` comparions for v3.2+ `Vectors` that now include the string store so that the source vector comparison is only comparing the vectors and not the strings.
This commit is contained in:
parent
f3981bd0c8
commit
ea450d652c
|
@ -701,7 +701,8 @@ class Language:
|
|||
if (
|
||||
self.vocab.vectors.shape != source.vocab.vectors.shape
|
||||
or self.vocab.vectors.key2row != source.vocab.vectors.key2row
|
||||
or self.vocab.vectors.to_bytes() != source.vocab.vectors.to_bytes()
|
||||
or self.vocab.vectors.to_bytes(exclude=["strings"])
|
||||
!= source.vocab.vectors.to_bytes(exclude=["strings"])
|
||||
):
|
||||
warnings.warn(Warnings.W113.format(name=source_name))
|
||||
if source_name not in source.component_names:
|
||||
|
@ -1822,7 +1823,9 @@ class Language:
|
|||
)
|
||||
if model not in source_nlp_vectors_hashes:
|
||||
source_nlp_vectors_hashes[model] = hash(
|
||||
source_nlps[model].vocab.vectors.to_bytes()
|
||||
source_nlps[model].vocab.vectors.to_bytes(
|
||||
exclude=["strings"]
|
||||
)
|
||||
)
|
||||
if "_sourced_vectors_hashes" not in nlp.meta:
|
||||
nlp.meta["_sourced_vectors_hashes"] = {}
|
||||
|
|
|
@ -132,7 +132,7 @@ def init_vocab(
|
|||
logger.info(f"Added vectors: {vectors}")
|
||||
# warn if source model vectors are not identical
|
||||
sourced_vectors_hashes = nlp.meta.pop("_sourced_vectors_hashes", {})
|
||||
vectors_hash = hash(nlp.vocab.vectors.to_bytes())
|
||||
vectors_hash = hash(nlp.vocab.vectors.to_bytes(exclude=["strings"]))
|
||||
for sourced_component, sourced_vectors_hash in sourced_vectors_hashes.items():
|
||||
if vectors_hash != sourced_vectors_hash:
|
||||
warnings.warn(Warnings.W113.format(name=sourced_component))
|
||||
|
|
Loading…
Reference in New Issue
Block a user