mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 01:16:28 +03:00
Skip duplicate lexeme rank setting (#5401)
Skip duplicate lexeme rank setting within `_fix_pretrained_vectors_name()`.
This commit is contained in:
parent
f49e2810e6
commit
908dea3939
13
spacy/_ml.py
13
spacy/_ml.py
|
@ -279,18 +279,19 @@ class PrecomputableAffine(Model):
|
|||
break
|
||||
|
||||
|
||||
def link_vectors_to_models(vocab):
|
||||
def link_vectors_to_models(vocab, skip_rank=False):
|
||||
vectors = vocab.vectors
|
||||
if vectors.name is None:
|
||||
vectors.name = VECTORS_KEY
|
||||
if vectors.data.size != 0:
|
||||
warnings.warn(Warnings.W020.format(shape=vectors.data.shape))
|
||||
ops = Model.ops
|
||||
for word in vocab:
|
||||
if word.orth in vectors.key2row:
|
||||
word.rank = vectors.key2row[word.orth]
|
||||
else:
|
||||
word.rank = util.OOV_RANK
|
||||
if not skip_rank:
|
||||
for word in vocab:
|
||||
if word.orth in vectors.key2row:
|
||||
word.rank = vectors.key2row[word.orth]
|
||||
else:
|
||||
word.rank = util.OOV_RANK
|
||||
data = ops.asarray(vectors.data)
|
||||
# Set an entry here, so that vectors are accessed by StaticVectors
|
||||
# (unideal, I know)
|
||||
|
|
|
@ -1072,7 +1072,7 @@ def _fix_pretrained_vectors_name(nlp):
|
|||
else:
|
||||
raise ValueError(Errors.E092)
|
||||
if nlp.vocab.vectors.size != 0:
|
||||
link_vectors_to_models(nlp.vocab)
|
||||
link_vectors_to_models(nlp.vocab, skip_rank=True)
|
||||
for name, proc in nlp.pipeline:
|
||||
if not hasattr(proc, "cfg"):
|
||||
continue
|
||||
|
|
Loading…
Reference in New Issue
Block a user