Skip duplicate lexeme rank setting (#5401)

Skip duplicate lexeme rank setting within
`_fix_pretrained_vectors_name()`.
This commit is contained in:
adrianeboyd 2020-05-14 18:26:12 +02:00 committed by GitHub
parent f49e2810e6
commit 908dea3939
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 8 additions and 7 deletions

View File

@ -279,18 +279,19 @@ class PrecomputableAffine(Model):
break
def link_vectors_to_models(vocab):
def link_vectors_to_models(vocab, skip_rank=False):
vectors = vocab.vectors
if vectors.name is None:
vectors.name = VECTORS_KEY
if vectors.data.size != 0:
warnings.warn(Warnings.W020.format(shape=vectors.data.shape))
ops = Model.ops
for word in vocab:
if word.orth in vectors.key2row:
word.rank = vectors.key2row[word.orth]
else:
word.rank = util.OOV_RANK
if not skip_rank:
for word in vocab:
if word.orth in vectors.key2row:
word.rank = vectors.key2row[word.orth]
else:
word.rank = util.OOV_RANK
data = ops.asarray(vectors.data)
# Set an entry here, so that vectors are accessed by StaticVectors
# (unideal, I know)

View File

@ -1072,7 +1072,7 @@ def _fix_pretrained_vectors_name(nlp):
else:
raise ValueError(Errors.E092)
if nlp.vocab.vectors.size != 0:
link_vectors_to_models(nlp.vocab)
link_vectors_to_models(nlp.vocab, skip_rank=True)
for name, proc in nlp.pipeline:
if not hasattr(proc, "cfg"):
continue