Add all strings from source models (#6736)

Add all strings from the source model when adding a pipe from a source
model.

Minor:

* Skip `disable=["vocab", "tokenizer"]` when loading a source model from
the config, since this doesn't do anything and is misleading.
This commit is contained in:
Adriane Boyd 2021-01-16 02:26:15 +01:00 committed by GitHub
parent 9328dd5625
commit c8b4370865
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -697,6 +697,8 @@ class Language:
source_config = source.config.interpolate()
pipe_config = util.copy_config(source_config["components"][source_name])
self._pipe_configs[name] = pipe_config
for s in source.vocab.strings:
self.vocab.strings.add(s)
return pipe, pipe_config["factory"]
def add_pipe(
@ -1619,9 +1621,7 @@ class Language:
if model not in source_nlps:
# We only need the components here and we need to init
# model with the same vocab as the current nlp object
source_nlps[model] = util.load_model(
model, vocab=nlp.vocab, disable=["vocab", "tokenizer"]
)
source_nlps[model] = util.load_model(model, vocab=nlp.vocab)
source_name = pipe_cfg.get("component", pipe_name)
nlp.add_pipe(source_name, source=source_nlps[model], name=pipe_name)
disabled_pipes = [*config["nlp"]["disabled"], *disable]