mirror of
https://github.com/explosion/spaCy.git
synced 2025-07-04 20:03:13 +03:00
Fix overwriting of lexical attributes when loading vectors during training
This commit is contained in:
parent
c0caf7cf27
commit
262d0a3148
|
@ -7,6 +7,7 @@ import tqdm
|
||||||
from thinc.neural._classes.model import Model
|
from thinc.neural._classes.model import Model
|
||||||
from timeit import default_timer as timer
|
from timeit import default_timer as timer
|
||||||
|
|
||||||
|
from ..attrs import PROB, IS_OOV, CLUSTER, LANG
|
||||||
from ..gold import GoldCorpus, minibatch
|
from ..gold import GoldCorpus, minibatch
|
||||||
from ..util import prints
|
from ..util import prints
|
||||||
from .. import util
|
from .. import util
|
||||||
|
@ -90,6 +91,15 @@ def train(lang, output_dir, train_data, dev_data, n_iter=30, n_sents=0,
|
||||||
nlp.meta.update(meta)
|
nlp.meta.update(meta)
|
||||||
if vectors:
|
if vectors:
|
||||||
util.load_model(vectors, vocab=nlp.vocab)
|
util.load_model(vectors, vocab=nlp.vocab)
|
||||||
|
for lex in nlp.vocab:
|
||||||
|
values = {}
|
||||||
|
for attr, func in nlp.vocab.lex_attr_getters.items():
|
||||||
|
# These attrs are expected to be set by data. Others should
|
||||||
|
# be set by calling the language functions.
|
||||||
|
if attr not in (CLUSTER, PROB, IS_OOV, LANG):
|
||||||
|
values[lex.vocab.strings[attr]] = func(lex.orth_)
|
||||||
|
lex.set_attrs(**values)
|
||||||
|
lex.is_oov = False
|
||||||
for name in pipeline:
|
for name in pipeline:
|
||||||
nlp.add_pipe(nlp.create_pipe(name), name=name)
|
nlp.add_pipe(nlp.create_pipe(name), name=name)
|
||||||
optimizer = nlp.begin_training(lambda: corpus.train_tuples, device=use_gpu)
|
optimizer = nlp.begin_training(lambda: corpus.train_tuples, device=use_gpu)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user