mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-27 01:34:30 +03:00
Clean up link_vectors_to_models unused stuff
This commit is contained in:
parent
0c17ea4c85
commit
1784c95827
|
@ -11,7 +11,6 @@ from ...util import ensure_path, working_dir
|
|||
from .._util import project_cli, Arg, PROJECT_FILE, load_project_config, get_checksum
|
||||
|
||||
|
||||
|
||||
# TODO: find a solution for caches
|
||||
# CACHES = [
|
||||
# Path.home() / ".torch",
|
||||
|
|
|
@ -21,7 +21,7 @@ from .vocab import Vocab, create_vocab
|
|||
from .pipe_analysis import analyze_pipes, analyze_all_pipes, validate_attrs
|
||||
from .gold import Example
|
||||
from .scorer import Scorer
|
||||
from .util import link_vectors_to_models, create_default_optimizer, registry
|
||||
from .util import create_default_optimizer, registry
|
||||
from .util import SimpleFrozenDict, combine_score_weights
|
||||
from .lang.tokenizer_exceptions import URL_MATCH, BASE_EXCEPTIONS
|
||||
from .lang.punctuation import TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES
|
||||
|
@ -1049,7 +1049,6 @@ class Language:
|
|||
if self.vocab.vectors.data.shape[1] >= 1:
|
||||
ops = get_current_ops()
|
||||
self.vocab.vectors.data = ops.asarray(self.vocab.vectors.data)
|
||||
link_vectors_to_models(self.vocab)
|
||||
if sgd is None:
|
||||
sgd = create_default_optimizer()
|
||||
self._optimizer = sgd
|
||||
|
@ -1082,7 +1081,6 @@ class Language:
|
|||
ops = get_current_ops()
|
||||
if self.vocab.vectors.data.shape[1] >= 1:
|
||||
self.vocab.vectors.data = ops.asarray(self.vocab.vectors.data)
|
||||
link_vectors_to_models(self.vocab)
|
||||
if sgd is None:
|
||||
sgd = create_default_optimizer()
|
||||
self._optimizer = sgd
|
||||
|
|
|
@ -149,7 +149,6 @@ class Morphologizer(Tagger):
|
|||
self.cfg["labels_pos"][norm_label] = POS_IDS[pos]
|
||||
self.set_output(len(self.labels))
|
||||
self.model.initialize()
|
||||
util.link_vectors_to_models(self.vocab)
|
||||
if sgd is None:
|
||||
sgd = self.create_optimizer()
|
||||
return sgd
|
||||
|
|
|
@ -11,7 +11,6 @@ from .tagger import Tagger
|
|||
from ..language import Language
|
||||
from ..syntax import nonproj
|
||||
from ..attrs import POS, ID
|
||||
from ..util import link_vectors_to_models
|
||||
from ..errors import Errors
|
||||
|
||||
|
||||
|
@ -91,7 +90,6 @@ class MultitaskObjective(Tagger):
|
|||
if label is not None and label not in self.labels:
|
||||
self.labels[label] = len(self.labels)
|
||||
self.model.initialize()
|
||||
link_vectors_to_models(self.vocab)
|
||||
if sgd is None:
|
||||
sgd = self.create_optimizer()
|
||||
return sgd
|
||||
|
@ -179,7 +177,6 @@ class ClozeMultitask(Pipe):
|
|||
pass
|
||||
|
||||
def begin_training(self, get_examples=lambda: [], pipeline=None, sgd=None):
|
||||
link_vectors_to_models(self.vocab)
|
||||
self.model.initialize()
|
||||
X = self.model.ops.alloc((5, self.model.get_ref("tok2vec").get_dim("nO")))
|
||||
self.model.output_layer.begin_training(X)
|
||||
|
|
|
@ -3,7 +3,7 @@ import srsly
|
|||
|
||||
from ..tokens.doc cimport Doc
|
||||
|
||||
from ..util import link_vectors_to_models, create_default_optimizer
|
||||
from ..util import create_default_optimizer
|
||||
from ..errors import Errors
|
||||
from .. import util
|
||||
|
||||
|
@ -145,8 +145,6 @@ class Pipe:
|
|||
DOCS: https://spacy.io/api/pipe#begin_training
|
||||
"""
|
||||
self.model.initialize()
|
||||
if hasattr(self, "vocab"):
|
||||
link_vectors_to_models(self.vocab)
|
||||
if sgd is None:
|
||||
sgd = self.create_optimizer()
|
||||
return sgd
|
||||
|
|
|
@ -138,7 +138,6 @@ class SentenceRecognizer(Tagger):
|
|||
"""
|
||||
self.set_output(len(self.labels))
|
||||
self.model.initialize()
|
||||
util.link_vectors_to_models(self.vocab)
|
||||
if sgd is None:
|
||||
sgd = self.create_optimizer()
|
||||
return sgd
|
||||
|
|
|
@ -168,7 +168,6 @@ class SimpleNER(Pipe):
|
|||
self.model.initialize()
|
||||
if pipeline is not None:
|
||||
self.init_multitask_objectives(get_examples, pipeline, sgd=sgd, **self.cfg)
|
||||
util.link_vectors_to_models(self.vocab)
|
||||
self.loss_func = SequenceCategoricalCrossentropy(
|
||||
names=self.get_tag_names(), normalize=True, missing_value=None
|
||||
)
|
||||
|
|
|
@ -318,7 +318,6 @@ class Tagger(Pipe):
|
|||
self.model.initialize(X=doc_sample)
|
||||
# Get batch of example docs, example outputs to call begin_training().
|
||||
# This lets the model infer shapes.
|
||||
util.link_vectors_to_models(self.vocab)
|
||||
if sgd is None:
|
||||
sgd = self.create_optimizer()
|
||||
return sgd
|
||||
|
|
|
@ -356,7 +356,6 @@ class TextCategorizer(Pipe):
|
|||
docs = [Doc(Vocab(), words=["hello"])]
|
||||
truths, _ = self._examples_to_truth(examples)
|
||||
self.set_output(len(self.labels))
|
||||
util.link_vectors_to_models(self.vocab)
|
||||
self.model.initialize(X=docs, Y=truths)
|
||||
if sgd is None:
|
||||
sgd = self.create_optimizer()
|
||||
|
|
|
@ -7,7 +7,7 @@ from ..tokens import Doc
|
|||
from ..vocab import Vocab
|
||||
from ..language import Language
|
||||
from ..errors import Errors
|
||||
from ..util import link_vectors_to_models, minibatch
|
||||
from ..util import minibatch
|
||||
|
||||
|
||||
default_model_config = """
|
||||
|
@ -198,7 +198,6 @@ class Tok2Vec(Pipe):
|
|||
"""
|
||||
docs = [Doc(self.vocab, words=["hello"])]
|
||||
self.model.initialize(X=docs)
|
||||
link_vectors_to_models(self.vocab)
|
||||
|
||||
|
||||
class Tok2VecListener(Model):
|
||||
|
|
|
@ -21,7 +21,7 @@ from .transition_system cimport Transition
|
|||
|
||||
from ..compat import copy_array
|
||||
from ..errors import Errors, TempErrors
|
||||
from ..util import link_vectors_to_models, create_default_optimizer
|
||||
from ..util import create_default_optimizer
|
||||
from .. import util
|
||||
from . import nonproj
|
||||
|
||||
|
|
|
@ -29,7 +29,7 @@ from .stateclass cimport StateClass
|
|||
from ._state cimport StateC
|
||||
from .transition_system cimport Transition
|
||||
|
||||
from ..util import link_vectors_to_models, create_default_optimizer, registry
|
||||
from ..util import create_default_optimizer, registry
|
||||
from ..compat import copy_array
|
||||
from ..errors import Errors, Warnings
|
||||
from .. import util
|
||||
|
@ -456,7 +456,6 @@ cdef class Parser:
|
|||
self.model.initialize()
|
||||
if pipeline is not None:
|
||||
self.init_multitask_objectives(get_examples, pipeline, sgd=sgd, **self.cfg)
|
||||
link_vectors_to_models(self.vocab)
|
||||
return sgd
|
||||
|
||||
def to_disk(self, path, exclude=tuple()):
|
||||
|
|
|
@ -9,7 +9,6 @@ from spacy.matcher import Matcher
|
|||
from spacy.tokens import Doc, Span
|
||||
from spacy.vocab import Vocab
|
||||
from spacy.compat import pickle
|
||||
from spacy.util import link_vectors_to_models
|
||||
import numpy
|
||||
import random
|
||||
|
||||
|
@ -190,7 +189,6 @@ def test_issue2871():
|
|||
_ = vocab[word] # noqa: F841
|
||||
vocab.set_vector(word, vector_data[0])
|
||||
vocab.vectors.name = "dummy_vectors"
|
||||
link_vectors_to_models(vocab)
|
||||
assert vocab["dog"].rank == 0
|
||||
assert vocab["cat"].rank == 1
|
||||
assert vocab["SUFFIX"].rank == 2
|
||||
|
|
|
@ -1200,29 +1200,6 @@ class DummyTokenizer:
|
|||
return self
|
||||
|
||||
|
||||
def link_vectors_to_models(
|
||||
vocab: "Vocab",
|
||||
models: List[Model] = [],
|
||||
*,
|
||||
vectors_name_attr="vectors_name",
|
||||
vectors_attr="vectors",
|
||||
key2row_attr="key2row",
|
||||
default_vectors_name="spacy_pretrained_vectors",
|
||||
) -> None:
|
||||
"""Supply vectors data to models."""
|
||||
vectors = vocab.vectors
|
||||
if vectors.name is None:
|
||||
vectors.name = default_vectors_name
|
||||
if vectors.data.size != 0:
|
||||
warnings.warn(Warnings.W020.format(shape=vectors.data.shape))
|
||||
|
||||
for model in models:
|
||||
for node in model.walk():
|
||||
if node.attrs.get(vectors_name_attr) == vectors.name:
|
||||
node.attrs[vectors_attr] = Unserializable(vectors.data)
|
||||
node.attrs[key2row_attr] = Unserializable(vectors.key2row)
|
||||
|
||||
|
||||
def create_default_optimizer() -> Optimizer:
|
||||
# TODO: Do we still want to allow env_opt?
|
||||
learn_rate = env_opt("learn_rate", 0.001)
|
||||
|
|
|
@ -16,7 +16,7 @@ from .errors import Errors
|
|||
from .lemmatizer import Lemmatizer
|
||||
from .attrs import intify_attrs, NORM, IS_STOP
|
||||
from .vectors import Vectors
|
||||
from .util import link_vectors_to_models, registry
|
||||
from .util import registry
|
||||
from .lookups import Lookups, load_lookups
|
||||
from . import util
|
||||
from .lang.norm_exceptions import BASE_NORMS
|
||||
|
@ -344,7 +344,6 @@ cdef class Vocab:
|
|||
synonym = self.strings[syn_keys[i][0]]
|
||||
score = scores[i][0]
|
||||
remap[word] = (synonym, score)
|
||||
link_vectors_to_models(self)
|
||||
return remap
|
||||
|
||||
def get_vector(self, orth, minn=None, maxn=None):
|
||||
|
@ -476,8 +475,6 @@ cdef class Vocab:
|
|||
if "vectors" not in exclude:
|
||||
if self.vectors is not None:
|
||||
self.vectors.from_disk(path, exclude=["strings"])
|
||||
if self.vectors.name is not None:
|
||||
link_vectors_to_models(self)
|
||||
if "lookups" not in exclude:
|
||||
self.lookups.from_disk(path)
|
||||
if "lexeme_norm" in self.lookups:
|
||||
|
@ -537,8 +534,6 @@ cdef class Vocab:
|
|||
)
|
||||
self.length = 0
|
||||
self._by_orth = PreshMap()
|
||||
if self.vectors.name is not None:
|
||||
link_vectors_to_models(self)
|
||||
return self
|
||||
|
||||
def _reset_cache(self, keys, strings):
|
||||
|
|
Loading…
Reference in New Issue
Block a user