mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-27 17:54:39 +03:00
Clean up link_vectors_to_models unused stuff
This commit is contained in:
parent
0c17ea4c85
commit
1784c95827
|
@ -11,7 +11,6 @@ from ...util import ensure_path, working_dir
|
||||||
from .._util import project_cli, Arg, PROJECT_FILE, load_project_config, get_checksum
|
from .._util import project_cli, Arg, PROJECT_FILE, load_project_config, get_checksum
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# TODO: find a solution for caches
|
# TODO: find a solution for caches
|
||||||
# CACHES = [
|
# CACHES = [
|
||||||
# Path.home() / ".torch",
|
# Path.home() / ".torch",
|
||||||
|
|
|
@ -21,7 +21,7 @@ from .vocab import Vocab, create_vocab
|
||||||
from .pipe_analysis import analyze_pipes, analyze_all_pipes, validate_attrs
|
from .pipe_analysis import analyze_pipes, analyze_all_pipes, validate_attrs
|
||||||
from .gold import Example
|
from .gold import Example
|
||||||
from .scorer import Scorer
|
from .scorer import Scorer
|
||||||
from .util import link_vectors_to_models, create_default_optimizer, registry
|
from .util import create_default_optimizer, registry
|
||||||
from .util import SimpleFrozenDict, combine_score_weights
|
from .util import SimpleFrozenDict, combine_score_weights
|
||||||
from .lang.tokenizer_exceptions import URL_MATCH, BASE_EXCEPTIONS
|
from .lang.tokenizer_exceptions import URL_MATCH, BASE_EXCEPTIONS
|
||||||
from .lang.punctuation import TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES
|
from .lang.punctuation import TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES
|
||||||
|
@ -1049,7 +1049,6 @@ class Language:
|
||||||
if self.vocab.vectors.data.shape[1] >= 1:
|
if self.vocab.vectors.data.shape[1] >= 1:
|
||||||
ops = get_current_ops()
|
ops = get_current_ops()
|
||||||
self.vocab.vectors.data = ops.asarray(self.vocab.vectors.data)
|
self.vocab.vectors.data = ops.asarray(self.vocab.vectors.data)
|
||||||
link_vectors_to_models(self.vocab)
|
|
||||||
if sgd is None:
|
if sgd is None:
|
||||||
sgd = create_default_optimizer()
|
sgd = create_default_optimizer()
|
||||||
self._optimizer = sgd
|
self._optimizer = sgd
|
||||||
|
@ -1082,7 +1081,6 @@ class Language:
|
||||||
ops = get_current_ops()
|
ops = get_current_ops()
|
||||||
if self.vocab.vectors.data.shape[1] >= 1:
|
if self.vocab.vectors.data.shape[1] >= 1:
|
||||||
self.vocab.vectors.data = ops.asarray(self.vocab.vectors.data)
|
self.vocab.vectors.data = ops.asarray(self.vocab.vectors.data)
|
||||||
link_vectors_to_models(self.vocab)
|
|
||||||
if sgd is None:
|
if sgd is None:
|
||||||
sgd = create_default_optimizer()
|
sgd = create_default_optimizer()
|
||||||
self._optimizer = sgd
|
self._optimizer = sgd
|
||||||
|
|
|
@ -149,7 +149,6 @@ class Morphologizer(Tagger):
|
||||||
self.cfg["labels_pos"][norm_label] = POS_IDS[pos]
|
self.cfg["labels_pos"][norm_label] = POS_IDS[pos]
|
||||||
self.set_output(len(self.labels))
|
self.set_output(len(self.labels))
|
||||||
self.model.initialize()
|
self.model.initialize()
|
||||||
util.link_vectors_to_models(self.vocab)
|
|
||||||
if sgd is None:
|
if sgd is None:
|
||||||
sgd = self.create_optimizer()
|
sgd = self.create_optimizer()
|
||||||
return sgd
|
return sgd
|
||||||
|
|
|
@ -11,7 +11,6 @@ from .tagger import Tagger
|
||||||
from ..language import Language
|
from ..language import Language
|
||||||
from ..syntax import nonproj
|
from ..syntax import nonproj
|
||||||
from ..attrs import POS, ID
|
from ..attrs import POS, ID
|
||||||
from ..util import link_vectors_to_models
|
|
||||||
from ..errors import Errors
|
from ..errors import Errors
|
||||||
|
|
||||||
|
|
||||||
|
@ -91,7 +90,6 @@ class MultitaskObjective(Tagger):
|
||||||
if label is not None and label not in self.labels:
|
if label is not None and label not in self.labels:
|
||||||
self.labels[label] = len(self.labels)
|
self.labels[label] = len(self.labels)
|
||||||
self.model.initialize()
|
self.model.initialize()
|
||||||
link_vectors_to_models(self.vocab)
|
|
||||||
if sgd is None:
|
if sgd is None:
|
||||||
sgd = self.create_optimizer()
|
sgd = self.create_optimizer()
|
||||||
return sgd
|
return sgd
|
||||||
|
@ -179,7 +177,6 @@ class ClozeMultitask(Pipe):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def begin_training(self, get_examples=lambda: [], pipeline=None, sgd=None):
|
def begin_training(self, get_examples=lambda: [], pipeline=None, sgd=None):
|
||||||
link_vectors_to_models(self.vocab)
|
|
||||||
self.model.initialize()
|
self.model.initialize()
|
||||||
X = self.model.ops.alloc((5, self.model.get_ref("tok2vec").get_dim("nO")))
|
X = self.model.ops.alloc((5, self.model.get_ref("tok2vec").get_dim("nO")))
|
||||||
self.model.output_layer.begin_training(X)
|
self.model.output_layer.begin_training(X)
|
||||||
|
|
|
@ -3,7 +3,7 @@ import srsly
|
||||||
|
|
||||||
from ..tokens.doc cimport Doc
|
from ..tokens.doc cimport Doc
|
||||||
|
|
||||||
from ..util import link_vectors_to_models, create_default_optimizer
|
from ..util import create_default_optimizer
|
||||||
from ..errors import Errors
|
from ..errors import Errors
|
||||||
from .. import util
|
from .. import util
|
||||||
|
|
||||||
|
@ -145,8 +145,6 @@ class Pipe:
|
||||||
DOCS: https://spacy.io/api/pipe#begin_training
|
DOCS: https://spacy.io/api/pipe#begin_training
|
||||||
"""
|
"""
|
||||||
self.model.initialize()
|
self.model.initialize()
|
||||||
if hasattr(self, "vocab"):
|
|
||||||
link_vectors_to_models(self.vocab)
|
|
||||||
if sgd is None:
|
if sgd is None:
|
||||||
sgd = self.create_optimizer()
|
sgd = self.create_optimizer()
|
||||||
return sgd
|
return sgd
|
||||||
|
|
|
@ -138,7 +138,6 @@ class SentenceRecognizer(Tagger):
|
||||||
"""
|
"""
|
||||||
self.set_output(len(self.labels))
|
self.set_output(len(self.labels))
|
||||||
self.model.initialize()
|
self.model.initialize()
|
||||||
util.link_vectors_to_models(self.vocab)
|
|
||||||
if sgd is None:
|
if sgd is None:
|
||||||
sgd = self.create_optimizer()
|
sgd = self.create_optimizer()
|
||||||
return sgd
|
return sgd
|
||||||
|
|
|
@ -168,7 +168,6 @@ class SimpleNER(Pipe):
|
||||||
self.model.initialize()
|
self.model.initialize()
|
||||||
if pipeline is not None:
|
if pipeline is not None:
|
||||||
self.init_multitask_objectives(get_examples, pipeline, sgd=sgd, **self.cfg)
|
self.init_multitask_objectives(get_examples, pipeline, sgd=sgd, **self.cfg)
|
||||||
util.link_vectors_to_models(self.vocab)
|
|
||||||
self.loss_func = SequenceCategoricalCrossentropy(
|
self.loss_func = SequenceCategoricalCrossentropy(
|
||||||
names=self.get_tag_names(), normalize=True, missing_value=None
|
names=self.get_tag_names(), normalize=True, missing_value=None
|
||||||
)
|
)
|
||||||
|
|
|
@ -318,7 +318,6 @@ class Tagger(Pipe):
|
||||||
self.model.initialize(X=doc_sample)
|
self.model.initialize(X=doc_sample)
|
||||||
# Get batch of example docs, example outputs to call begin_training().
|
# Get batch of example docs, example outputs to call begin_training().
|
||||||
# This lets the model infer shapes.
|
# This lets the model infer shapes.
|
||||||
util.link_vectors_to_models(self.vocab)
|
|
||||||
if sgd is None:
|
if sgd is None:
|
||||||
sgd = self.create_optimizer()
|
sgd = self.create_optimizer()
|
||||||
return sgd
|
return sgd
|
||||||
|
|
|
@ -356,7 +356,6 @@ class TextCategorizer(Pipe):
|
||||||
docs = [Doc(Vocab(), words=["hello"])]
|
docs = [Doc(Vocab(), words=["hello"])]
|
||||||
truths, _ = self._examples_to_truth(examples)
|
truths, _ = self._examples_to_truth(examples)
|
||||||
self.set_output(len(self.labels))
|
self.set_output(len(self.labels))
|
||||||
util.link_vectors_to_models(self.vocab)
|
|
||||||
self.model.initialize(X=docs, Y=truths)
|
self.model.initialize(X=docs, Y=truths)
|
||||||
if sgd is None:
|
if sgd is None:
|
||||||
sgd = self.create_optimizer()
|
sgd = self.create_optimizer()
|
||||||
|
|
|
@ -7,7 +7,7 @@ from ..tokens import Doc
|
||||||
from ..vocab import Vocab
|
from ..vocab import Vocab
|
||||||
from ..language import Language
|
from ..language import Language
|
||||||
from ..errors import Errors
|
from ..errors import Errors
|
||||||
from ..util import link_vectors_to_models, minibatch
|
from ..util import minibatch
|
||||||
|
|
||||||
|
|
||||||
default_model_config = """
|
default_model_config = """
|
||||||
|
@ -198,7 +198,6 @@ class Tok2Vec(Pipe):
|
||||||
"""
|
"""
|
||||||
docs = [Doc(self.vocab, words=["hello"])]
|
docs = [Doc(self.vocab, words=["hello"])]
|
||||||
self.model.initialize(X=docs)
|
self.model.initialize(X=docs)
|
||||||
link_vectors_to_models(self.vocab)
|
|
||||||
|
|
||||||
|
|
||||||
class Tok2VecListener(Model):
|
class Tok2VecListener(Model):
|
||||||
|
|
|
@ -21,7 +21,7 @@ from .transition_system cimport Transition
|
||||||
|
|
||||||
from ..compat import copy_array
|
from ..compat import copy_array
|
||||||
from ..errors import Errors, TempErrors
|
from ..errors import Errors, TempErrors
|
||||||
from ..util import link_vectors_to_models, create_default_optimizer
|
from ..util import create_default_optimizer
|
||||||
from .. import util
|
from .. import util
|
||||||
from . import nonproj
|
from . import nonproj
|
||||||
|
|
||||||
|
|
|
@ -29,7 +29,7 @@ from .stateclass cimport StateClass
|
||||||
from ._state cimport StateC
|
from ._state cimport StateC
|
||||||
from .transition_system cimport Transition
|
from .transition_system cimport Transition
|
||||||
|
|
||||||
from ..util import link_vectors_to_models, create_default_optimizer, registry
|
from ..util import create_default_optimizer, registry
|
||||||
from ..compat import copy_array
|
from ..compat import copy_array
|
||||||
from ..errors import Errors, Warnings
|
from ..errors import Errors, Warnings
|
||||||
from .. import util
|
from .. import util
|
||||||
|
@ -456,7 +456,6 @@ cdef class Parser:
|
||||||
self.model.initialize()
|
self.model.initialize()
|
||||||
if pipeline is not None:
|
if pipeline is not None:
|
||||||
self.init_multitask_objectives(get_examples, pipeline, sgd=sgd, **self.cfg)
|
self.init_multitask_objectives(get_examples, pipeline, sgd=sgd, **self.cfg)
|
||||||
link_vectors_to_models(self.vocab)
|
|
||||||
return sgd
|
return sgd
|
||||||
|
|
||||||
def to_disk(self, path, exclude=tuple()):
|
def to_disk(self, path, exclude=tuple()):
|
||||||
|
|
|
@ -9,7 +9,6 @@ from spacy.matcher import Matcher
|
||||||
from spacy.tokens import Doc, Span
|
from spacy.tokens import Doc, Span
|
||||||
from spacy.vocab import Vocab
|
from spacy.vocab import Vocab
|
||||||
from spacy.compat import pickle
|
from spacy.compat import pickle
|
||||||
from spacy.util import link_vectors_to_models
|
|
||||||
import numpy
|
import numpy
|
||||||
import random
|
import random
|
||||||
|
|
||||||
|
@ -190,7 +189,6 @@ def test_issue2871():
|
||||||
_ = vocab[word] # noqa: F841
|
_ = vocab[word] # noqa: F841
|
||||||
vocab.set_vector(word, vector_data[0])
|
vocab.set_vector(word, vector_data[0])
|
||||||
vocab.vectors.name = "dummy_vectors"
|
vocab.vectors.name = "dummy_vectors"
|
||||||
link_vectors_to_models(vocab)
|
|
||||||
assert vocab["dog"].rank == 0
|
assert vocab["dog"].rank == 0
|
||||||
assert vocab["cat"].rank == 1
|
assert vocab["cat"].rank == 1
|
||||||
assert vocab["SUFFIX"].rank == 2
|
assert vocab["SUFFIX"].rank == 2
|
||||||
|
|
|
@ -1200,29 +1200,6 @@ class DummyTokenizer:
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
|
||||||
def link_vectors_to_models(
|
|
||||||
vocab: "Vocab",
|
|
||||||
models: List[Model] = [],
|
|
||||||
*,
|
|
||||||
vectors_name_attr="vectors_name",
|
|
||||||
vectors_attr="vectors",
|
|
||||||
key2row_attr="key2row",
|
|
||||||
default_vectors_name="spacy_pretrained_vectors",
|
|
||||||
) -> None:
|
|
||||||
"""Supply vectors data to models."""
|
|
||||||
vectors = vocab.vectors
|
|
||||||
if vectors.name is None:
|
|
||||||
vectors.name = default_vectors_name
|
|
||||||
if vectors.data.size != 0:
|
|
||||||
warnings.warn(Warnings.W020.format(shape=vectors.data.shape))
|
|
||||||
|
|
||||||
for model in models:
|
|
||||||
for node in model.walk():
|
|
||||||
if node.attrs.get(vectors_name_attr) == vectors.name:
|
|
||||||
node.attrs[vectors_attr] = Unserializable(vectors.data)
|
|
||||||
node.attrs[key2row_attr] = Unserializable(vectors.key2row)
|
|
||||||
|
|
||||||
|
|
||||||
def create_default_optimizer() -> Optimizer:
|
def create_default_optimizer() -> Optimizer:
|
||||||
# TODO: Do we still want to allow env_opt?
|
# TODO: Do we still want to allow env_opt?
|
||||||
learn_rate = env_opt("learn_rate", 0.001)
|
learn_rate = env_opt("learn_rate", 0.001)
|
||||||
|
|
|
@ -16,7 +16,7 @@ from .errors import Errors
|
||||||
from .lemmatizer import Lemmatizer
|
from .lemmatizer import Lemmatizer
|
||||||
from .attrs import intify_attrs, NORM, IS_STOP
|
from .attrs import intify_attrs, NORM, IS_STOP
|
||||||
from .vectors import Vectors
|
from .vectors import Vectors
|
||||||
from .util import link_vectors_to_models, registry
|
from .util import registry
|
||||||
from .lookups import Lookups, load_lookups
|
from .lookups import Lookups, load_lookups
|
||||||
from . import util
|
from . import util
|
||||||
from .lang.norm_exceptions import BASE_NORMS
|
from .lang.norm_exceptions import BASE_NORMS
|
||||||
|
@ -344,7 +344,6 @@ cdef class Vocab:
|
||||||
synonym = self.strings[syn_keys[i][0]]
|
synonym = self.strings[syn_keys[i][0]]
|
||||||
score = scores[i][0]
|
score = scores[i][0]
|
||||||
remap[word] = (synonym, score)
|
remap[word] = (synonym, score)
|
||||||
link_vectors_to_models(self)
|
|
||||||
return remap
|
return remap
|
||||||
|
|
||||||
def get_vector(self, orth, minn=None, maxn=None):
|
def get_vector(self, orth, minn=None, maxn=None):
|
||||||
|
@ -476,8 +475,6 @@ cdef class Vocab:
|
||||||
if "vectors" not in exclude:
|
if "vectors" not in exclude:
|
||||||
if self.vectors is not None:
|
if self.vectors is not None:
|
||||||
self.vectors.from_disk(path, exclude=["strings"])
|
self.vectors.from_disk(path, exclude=["strings"])
|
||||||
if self.vectors.name is not None:
|
|
||||||
link_vectors_to_models(self)
|
|
||||||
if "lookups" not in exclude:
|
if "lookups" not in exclude:
|
||||||
self.lookups.from_disk(path)
|
self.lookups.from_disk(path)
|
||||||
if "lexeme_norm" in self.lookups:
|
if "lexeme_norm" in self.lookups:
|
||||||
|
@ -537,8 +534,6 @@ cdef class Vocab:
|
||||||
)
|
)
|
||||||
self.length = 0
|
self.length = 0
|
||||||
self._by_orth = PreshMap()
|
self._by_orth = PreshMap()
|
||||||
if self.vectors.name is not None:
|
|
||||||
link_vectors_to_models(self)
|
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def _reset_cache(self, keys, strings):
|
def _reset_cache(self, keys, strings):
|
||||||
|
|
Loading…
Reference in New Issue
Block a user