From 1784c95827f7a2fe8f8df88facced72af73cc961 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Tue, 28 Jul 2020 22:17:47 +0200 Subject: [PATCH] Clean up link_vectors_to_models unused stuff --- spacy/cli/project/assets.py | 1 - spacy/language.py | 4 +--- spacy/pipeline/morphologizer.pyx | 1 - spacy/pipeline/multitask.pyx | 3 --- spacy/pipeline/pipe.pyx | 4 +--- spacy/pipeline/senter.pyx | 1 - spacy/pipeline/simple_ner.py | 1 - spacy/pipeline/tagger.pyx | 1 - spacy/pipeline/textcat.py | 1 - spacy/pipeline/tok2vec.py | 3 +-- spacy/syntax/_parser_model.pyx | 2 +- spacy/syntax/nn_parser.pyx | 3 +-- spacy/tests/regression/test_issue2501-3000.py | 2 -- spacy/util.py | 23 ------------------- spacy/vocab.pyx | 7 +----- 15 files changed, 6 insertions(+), 51 deletions(-) diff --git a/spacy/cli/project/assets.py b/spacy/cli/project/assets.py index 1bd28cb7e..e42935e2f 100644 --- a/spacy/cli/project/assets.py +++ b/spacy/cli/project/assets.py @@ -11,7 +11,6 @@ from ...util import ensure_path, working_dir from .._util import project_cli, Arg, PROJECT_FILE, load_project_config, get_checksum - # TODO: find a solution for caches # CACHES = [ # Path.home() / ".torch", diff --git a/spacy/language.py b/spacy/language.py index 3511a7691..4b7651d65 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -21,7 +21,7 @@ from .vocab import Vocab, create_vocab from .pipe_analysis import analyze_pipes, analyze_all_pipes, validate_attrs from .gold import Example from .scorer import Scorer -from .util import link_vectors_to_models, create_default_optimizer, registry +from .util import create_default_optimizer, registry from .util import SimpleFrozenDict, combine_score_weights from .lang.tokenizer_exceptions import URL_MATCH, BASE_EXCEPTIONS from .lang.punctuation import TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES @@ -1049,7 +1049,6 @@ class Language: if self.vocab.vectors.data.shape[1] >= 1: ops = get_current_ops() self.vocab.vectors.data = ops.asarray(self.vocab.vectors.data) - link_vectors_to_models(self.vocab) if sgd is None: sgd = create_default_optimizer() self._optimizer = sgd @@ -1082,7 +1081,6 @@ class Language: ops = get_current_ops() if self.vocab.vectors.data.shape[1] >= 1: self.vocab.vectors.data = ops.asarray(self.vocab.vectors.data) - link_vectors_to_models(self.vocab) if sgd is None: sgd = create_default_optimizer() self._optimizer = sgd diff --git a/spacy/pipeline/morphologizer.pyx b/spacy/pipeline/morphologizer.pyx index a6be129ba..56ef44cb9 100644 --- a/spacy/pipeline/morphologizer.pyx +++ b/spacy/pipeline/morphologizer.pyx @@ -149,7 +149,6 @@ class Morphologizer(Tagger): self.cfg["labels_pos"][norm_label] = POS_IDS[pos] self.set_output(len(self.labels)) self.model.initialize() - util.link_vectors_to_models(self.vocab) if sgd is None: sgd = self.create_optimizer() return sgd diff --git a/spacy/pipeline/multitask.pyx b/spacy/pipeline/multitask.pyx index 4945afe4f..97826aaa6 100644 --- a/spacy/pipeline/multitask.pyx +++ b/spacy/pipeline/multitask.pyx @@ -11,7 +11,6 @@ from .tagger import Tagger from ..language import Language from ..syntax import nonproj from ..attrs import POS, ID -from ..util import link_vectors_to_models from ..errors import Errors @@ -91,7 +90,6 @@ class MultitaskObjective(Tagger): if label is not None and label not in self.labels: self.labels[label] = len(self.labels) self.model.initialize() - link_vectors_to_models(self.vocab) if sgd is None: sgd = self.create_optimizer() return sgd @@ -179,7 +177,6 @@ class ClozeMultitask(Pipe): pass def begin_training(self, get_examples=lambda: [], pipeline=None, sgd=None): - link_vectors_to_models(self.vocab) self.model.initialize() X = self.model.ops.alloc((5, self.model.get_ref("tok2vec").get_dim("nO"))) self.model.output_layer.begin_training(X) diff --git a/spacy/pipeline/pipe.pyx b/spacy/pipeline/pipe.pyx index f8ca28724..e4f7989b8 100644 --- a/spacy/pipeline/pipe.pyx +++ b/spacy/pipeline/pipe.pyx @@ -3,7 +3,7 @@ import srsly from ..tokens.doc cimport Doc -from ..util import link_vectors_to_models, create_default_optimizer +from ..util import create_default_optimizer from ..errors import Errors from .. import util @@ -145,8 +145,6 @@ class Pipe: DOCS: https://spacy.io/api/pipe#begin_training """ self.model.initialize() - if hasattr(self, "vocab"): - link_vectors_to_models(self.vocab) if sgd is None: sgd = self.create_optimizer() return sgd diff --git a/spacy/pipeline/senter.pyx b/spacy/pipeline/senter.pyx index 743ceb32b..568e6031b 100644 --- a/spacy/pipeline/senter.pyx +++ b/spacy/pipeline/senter.pyx @@ -138,7 +138,6 @@ class SentenceRecognizer(Tagger): """ self.set_output(len(self.labels)) self.model.initialize() - util.link_vectors_to_models(self.vocab) if sgd is None: sgd = self.create_optimizer() return sgd diff --git a/spacy/pipeline/simple_ner.py b/spacy/pipeline/simple_ner.py index ec7ab6b7a..9b9872b77 100644 --- a/spacy/pipeline/simple_ner.py +++ b/spacy/pipeline/simple_ner.py @@ -168,7 +168,6 @@ class SimpleNER(Pipe): self.model.initialize() if pipeline is not None: self.init_multitask_objectives(get_examples, pipeline, sgd=sgd, **self.cfg) - util.link_vectors_to_models(self.vocab) self.loss_func = SequenceCategoricalCrossentropy( names=self.get_tag_names(), normalize=True, missing_value=None ) diff --git a/spacy/pipeline/tagger.pyx b/spacy/pipeline/tagger.pyx index c52a7889b..b3f996acb 100644 --- a/spacy/pipeline/tagger.pyx +++ b/spacy/pipeline/tagger.pyx @@ -318,7 +318,6 @@ class Tagger(Pipe): self.model.initialize(X=doc_sample) # Get batch of example docs, example outputs to call begin_training(). # This lets the model infer shapes. - util.link_vectors_to_models(self.vocab) if sgd is None: sgd = self.create_optimizer() return sgd diff --git a/spacy/pipeline/textcat.py b/spacy/pipeline/textcat.py index 2aaa4a769..c235a2594 100644 --- a/spacy/pipeline/textcat.py +++ b/spacy/pipeline/textcat.py @@ -356,7 +356,6 @@ class TextCategorizer(Pipe): docs = [Doc(Vocab(), words=["hello"])] truths, _ = self._examples_to_truth(examples) self.set_output(len(self.labels)) - util.link_vectors_to_models(self.vocab) self.model.initialize(X=docs, Y=truths) if sgd is None: sgd = self.create_optimizer() diff --git a/spacy/pipeline/tok2vec.py b/spacy/pipeline/tok2vec.py index 5caaf432f..5e9e5b40e 100644 --- a/spacy/pipeline/tok2vec.py +++ b/spacy/pipeline/tok2vec.py @@ -7,7 +7,7 @@ from ..tokens import Doc from ..vocab import Vocab from ..language import Language from ..errors import Errors -from ..util import link_vectors_to_models, minibatch +from ..util import minibatch default_model_config = """ @@ -198,7 +198,6 @@ class Tok2Vec(Pipe): """ docs = [Doc(self.vocab, words=["hello"])] self.model.initialize(X=docs) - link_vectors_to_models(self.vocab) class Tok2VecListener(Model): diff --git a/spacy/syntax/_parser_model.pyx b/spacy/syntax/_parser_model.pyx index 7acee5efd..eedd84bac 100644 --- a/spacy/syntax/_parser_model.pyx +++ b/spacy/syntax/_parser_model.pyx @@ -21,7 +21,7 @@ from .transition_system cimport Transition from ..compat import copy_array from ..errors import Errors, TempErrors -from ..util import link_vectors_to_models, create_default_optimizer +from ..util import create_default_optimizer from .. import util from . import nonproj diff --git a/spacy/syntax/nn_parser.pyx b/spacy/syntax/nn_parser.pyx index 5313ec9bd..a0ee13a0a 100644 --- a/spacy/syntax/nn_parser.pyx +++ b/spacy/syntax/nn_parser.pyx @@ -29,7 +29,7 @@ from .stateclass cimport StateClass from ._state cimport StateC from .transition_system cimport Transition -from ..util import link_vectors_to_models, create_default_optimizer, registry +from ..util import create_default_optimizer, registry from ..compat import copy_array from ..errors import Errors, Warnings from .. import util @@ -456,7 +456,6 @@ cdef class Parser: self.model.initialize() if pipeline is not None: self.init_multitask_objectives(get_examples, pipeline, sgd=sgd, **self.cfg) - link_vectors_to_models(self.vocab) return sgd def to_disk(self, path, exclude=tuple()): diff --git a/spacy/tests/regression/test_issue2501-3000.py b/spacy/tests/regression/test_issue2501-3000.py index ac0867189..cf4e402e2 100644 --- a/spacy/tests/regression/test_issue2501-3000.py +++ b/spacy/tests/regression/test_issue2501-3000.py @@ -9,7 +9,6 @@ from spacy.matcher import Matcher from spacy.tokens import Doc, Span from spacy.vocab import Vocab from spacy.compat import pickle -from spacy.util import link_vectors_to_models import numpy import random @@ -190,7 +189,6 @@ def test_issue2871(): _ = vocab[word] # noqa: F841 vocab.set_vector(word, vector_data[0]) vocab.vectors.name = "dummy_vectors" - link_vectors_to_models(vocab) assert vocab["dog"].rank == 0 assert vocab["cat"].rank == 1 assert vocab["SUFFIX"].rank == 2 diff --git a/spacy/util.py b/spacy/util.py index 898e1c2c3..677f5e8e0 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -1200,29 +1200,6 @@ class DummyTokenizer: return self -def link_vectors_to_models( - vocab: "Vocab", - models: List[Model] = [], - *, - vectors_name_attr="vectors_name", - vectors_attr="vectors", - key2row_attr="key2row", - default_vectors_name="spacy_pretrained_vectors", -) -> None: - """Supply vectors data to models.""" - vectors = vocab.vectors - if vectors.name is None: - vectors.name = default_vectors_name - if vectors.data.size != 0: - warnings.warn(Warnings.W020.format(shape=vectors.data.shape)) - - for model in models: - for node in model.walk(): - if node.attrs.get(vectors_name_attr) == vectors.name: - node.attrs[vectors_attr] = Unserializable(vectors.data) - node.attrs[key2row_attr] = Unserializable(vectors.key2row) - - def create_default_optimizer() -> Optimizer: # TODO: Do we still want to allow env_opt? learn_rate = env_opt("learn_rate", 0.001) diff --git a/spacy/vocab.pyx b/spacy/vocab.pyx index f41ad2356..b7337b92e 100644 --- a/spacy/vocab.pyx +++ b/spacy/vocab.pyx @@ -16,7 +16,7 @@ from .errors import Errors from .lemmatizer import Lemmatizer from .attrs import intify_attrs, NORM, IS_STOP from .vectors import Vectors -from .util import link_vectors_to_models, registry +from .util import registry from .lookups import Lookups, load_lookups from . import util from .lang.norm_exceptions import BASE_NORMS @@ -344,7 +344,6 @@ cdef class Vocab: synonym = self.strings[syn_keys[i][0]] score = scores[i][0] remap[word] = (synonym, score) - link_vectors_to_models(self) return remap def get_vector(self, orth, minn=None, maxn=None): @@ -476,8 +475,6 @@ cdef class Vocab: if "vectors" not in exclude: if self.vectors is not None: self.vectors.from_disk(path, exclude=["strings"]) - if self.vectors.name is not None: - link_vectors_to_models(self) if "lookups" not in exclude: self.lookups.from_disk(path) if "lexeme_norm" in self.lookups: @@ -537,8 +534,6 @@ cdef class Vocab: ) self.length = 0 self._by_orth = PreshMap() - if self.vectors.name is not None: - link_vectors_to_models(self) return self def _reset_cache(self, keys, strings):