Clean up link_vectors_to_models unused stuff

This commit is contained in:
Matthew Honnibal 2020-07-28 22:17:47 +02:00
parent 0c17ea4c85
commit 1784c95827
15 changed files with 6 additions and 51 deletions

View File

@ -11,7 +11,6 @@ from ...util import ensure_path, working_dir
from .._util import project_cli, Arg, PROJECT_FILE, load_project_config, get_checksum
# TODO: find a solution for caches
# CACHES = [
# Path.home() / ".torch",

View File

@ -21,7 +21,7 @@ from .vocab import Vocab, create_vocab
from .pipe_analysis import analyze_pipes, analyze_all_pipes, validate_attrs
from .gold import Example
from .scorer import Scorer
from .util import link_vectors_to_models, create_default_optimizer, registry
from .util import create_default_optimizer, registry
from .util import SimpleFrozenDict, combine_score_weights
from .lang.tokenizer_exceptions import URL_MATCH, BASE_EXCEPTIONS
from .lang.punctuation import TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES
@ -1049,7 +1049,6 @@ class Language:
if self.vocab.vectors.data.shape[1] >= 1:
ops = get_current_ops()
self.vocab.vectors.data = ops.asarray(self.vocab.vectors.data)
link_vectors_to_models(self.vocab)
if sgd is None:
sgd = create_default_optimizer()
self._optimizer = sgd
@ -1082,7 +1081,6 @@ class Language:
ops = get_current_ops()
if self.vocab.vectors.data.shape[1] >= 1:
self.vocab.vectors.data = ops.asarray(self.vocab.vectors.data)
link_vectors_to_models(self.vocab)
if sgd is None:
sgd = create_default_optimizer()
self._optimizer = sgd

View File

@ -149,7 +149,6 @@ class Morphologizer(Tagger):
self.cfg["labels_pos"][norm_label] = POS_IDS[pos]
self.set_output(len(self.labels))
self.model.initialize()
util.link_vectors_to_models(self.vocab)
if sgd is None:
sgd = self.create_optimizer()
return sgd

View File

@ -11,7 +11,6 @@ from .tagger import Tagger
from ..language import Language
from ..syntax import nonproj
from ..attrs import POS, ID
from ..util import link_vectors_to_models
from ..errors import Errors
@ -91,7 +90,6 @@ class MultitaskObjective(Tagger):
if label is not None and label not in self.labels:
self.labels[label] = len(self.labels)
self.model.initialize()
link_vectors_to_models(self.vocab)
if sgd is None:
sgd = self.create_optimizer()
return sgd
@ -179,7 +177,6 @@ class ClozeMultitask(Pipe):
pass
def begin_training(self, get_examples=lambda: [], pipeline=None, sgd=None):
link_vectors_to_models(self.vocab)
self.model.initialize()
X = self.model.ops.alloc((5, self.model.get_ref("tok2vec").get_dim("nO")))
self.model.output_layer.begin_training(X)

View File

@ -3,7 +3,7 @@ import srsly
from ..tokens.doc cimport Doc
from ..util import link_vectors_to_models, create_default_optimizer
from ..util import create_default_optimizer
from ..errors import Errors
from .. import util
@ -145,8 +145,6 @@ class Pipe:
DOCS: https://spacy.io/api/pipe#begin_training
"""
self.model.initialize()
if hasattr(self, "vocab"):
link_vectors_to_models(self.vocab)
if sgd is None:
sgd = self.create_optimizer()
return sgd

View File

@ -138,7 +138,6 @@ class SentenceRecognizer(Tagger):
"""
self.set_output(len(self.labels))
self.model.initialize()
util.link_vectors_to_models(self.vocab)
if sgd is None:
sgd = self.create_optimizer()
return sgd

View File

@ -168,7 +168,6 @@ class SimpleNER(Pipe):
self.model.initialize()
if pipeline is not None:
self.init_multitask_objectives(get_examples, pipeline, sgd=sgd, **self.cfg)
util.link_vectors_to_models(self.vocab)
self.loss_func = SequenceCategoricalCrossentropy(
names=self.get_tag_names(), normalize=True, missing_value=None
)

View File

@ -318,7 +318,6 @@ class Tagger(Pipe):
self.model.initialize(X=doc_sample)
# Get batch of example docs, example outputs to call begin_training().
# This lets the model infer shapes.
util.link_vectors_to_models(self.vocab)
if sgd is None:
sgd = self.create_optimizer()
return sgd

View File

@ -356,7 +356,6 @@ class TextCategorizer(Pipe):
docs = [Doc(Vocab(), words=["hello"])]
truths, _ = self._examples_to_truth(examples)
self.set_output(len(self.labels))
util.link_vectors_to_models(self.vocab)
self.model.initialize(X=docs, Y=truths)
if sgd is None:
sgd = self.create_optimizer()

View File

@ -7,7 +7,7 @@ from ..tokens import Doc
from ..vocab import Vocab
from ..language import Language
from ..errors import Errors
from ..util import link_vectors_to_models, minibatch
from ..util import minibatch
default_model_config = """
@ -198,7 +198,6 @@ class Tok2Vec(Pipe):
"""
docs = [Doc(self.vocab, words=["hello"])]
self.model.initialize(X=docs)
link_vectors_to_models(self.vocab)
class Tok2VecListener(Model):

View File

@ -21,7 +21,7 @@ from .transition_system cimport Transition
from ..compat import copy_array
from ..errors import Errors, TempErrors
from ..util import link_vectors_to_models, create_default_optimizer
from ..util import create_default_optimizer
from .. import util
from . import nonproj

View File

@ -29,7 +29,7 @@ from .stateclass cimport StateClass
from ._state cimport StateC
from .transition_system cimport Transition
from ..util import link_vectors_to_models, create_default_optimizer, registry
from ..util import create_default_optimizer, registry
from ..compat import copy_array
from ..errors import Errors, Warnings
from .. import util
@ -456,7 +456,6 @@ cdef class Parser:
self.model.initialize()
if pipeline is not None:
self.init_multitask_objectives(get_examples, pipeline, sgd=sgd, **self.cfg)
link_vectors_to_models(self.vocab)
return sgd
def to_disk(self, path, exclude=tuple()):

View File

@ -9,7 +9,6 @@ from spacy.matcher import Matcher
from spacy.tokens import Doc, Span
from spacy.vocab import Vocab
from spacy.compat import pickle
from spacy.util import link_vectors_to_models
import numpy
import random
@ -190,7 +189,6 @@ def test_issue2871():
_ = vocab[word] # noqa: F841
vocab.set_vector(word, vector_data[0])
vocab.vectors.name = "dummy_vectors"
link_vectors_to_models(vocab)
assert vocab["dog"].rank == 0
assert vocab["cat"].rank == 1
assert vocab["SUFFIX"].rank == 2

View File

@ -1200,29 +1200,6 @@ class DummyTokenizer:
return self
def link_vectors_to_models(
vocab: "Vocab",
models: List[Model] = [],
*,
vectors_name_attr="vectors_name",
vectors_attr="vectors",
key2row_attr="key2row",
default_vectors_name="spacy_pretrained_vectors",
) -> None:
"""Supply vectors data to models."""
vectors = vocab.vectors
if vectors.name is None:
vectors.name = default_vectors_name
if vectors.data.size != 0:
warnings.warn(Warnings.W020.format(shape=vectors.data.shape))
for model in models:
for node in model.walk():
if node.attrs.get(vectors_name_attr) == vectors.name:
node.attrs[vectors_attr] = Unserializable(vectors.data)
node.attrs[key2row_attr] = Unserializable(vectors.key2row)
def create_default_optimizer() -> Optimizer:
# TODO: Do we still want to allow env_opt?
learn_rate = env_opt("learn_rate", 0.001)

View File

@ -16,7 +16,7 @@ from .errors import Errors
from .lemmatizer import Lemmatizer
from .attrs import intify_attrs, NORM, IS_STOP
from .vectors import Vectors
from .util import link_vectors_to_models, registry
from .util import registry
from .lookups import Lookups, load_lookups
from . import util
from .lang.norm_exceptions import BASE_NORMS
@ -344,7 +344,6 @@ cdef class Vocab:
synonym = self.strings[syn_keys[i][0]]
score = scores[i][0]
remap[word] = (synonym, score)
link_vectors_to_models(self)
return remap
def get_vector(self, orth, minn=None, maxn=None):
@ -476,8 +475,6 @@ cdef class Vocab:
if "vectors" not in exclude:
if self.vectors is not None:
self.vectors.from_disk(path, exclude=["strings"])
if self.vectors.name is not None:
link_vectors_to_models(self)
if "lookups" not in exclude:
self.lookups.from_disk(path)
if "lexeme_norm" in self.lookups:
@ -537,8 +534,6 @@ cdef class Vocab:
)
self.length = 0
self._by_orth = PreshMap()
if self.vectors.name is not None:
link_vectors_to_models(self)
return self
def _reset_cache(self, keys, strings):