mirror of
https://github.com/explosion/spaCy.git
synced 2025-05-03 15:23:41 +03:00
Improve Tensorizer
This commit is contained in:
parent
baf7feae68
commit
dac3f1b280
|
@ -31,7 +31,7 @@ from .matcher import Matcher
|
||||||
|
|
||||||
from .matcher import Matcher, PhraseMatcher
|
from .matcher import Matcher, PhraseMatcher
|
||||||
from .tokens.span import Span
|
from .tokens.span import Span
|
||||||
from .attrs import POS
|
from .attrs import POS, ID
|
||||||
from .parts_of_speech import X
|
from .parts_of_speech import X
|
||||||
from ._ml import Tok2Vec, build_text_classifier, build_tagger_model
|
from ._ml import Tok2Vec, build_text_classifier, build_tagger_model
|
||||||
from ._ml import link_vectors_to_models, zero_init, flatten
|
from ._ml import link_vectors_to_models, zero_init, flatten
|
||||||
|
@ -434,7 +434,7 @@ class Tensorizer(Pipe):
|
||||||
name = 'tensorizer'
|
name = 'tensorizer'
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def Model(cls, output_size=300, input_size=128, **cfg):
|
def Model(cls, output_size=300, **cfg):
|
||||||
"""Create a new statistical model for the class.
|
"""Create a new statistical model for the class.
|
||||||
|
|
||||||
width (int): Output size of the model.
|
width (int): Output size of the model.
|
||||||
|
@ -442,6 +442,7 @@ class Tensorizer(Pipe):
|
||||||
**cfg: Config parameters.
|
**cfg: Config parameters.
|
||||||
RETURNS (Model): A `thinc.neural.Model` or similar instance.
|
RETURNS (Model): A `thinc.neural.Model` or similar instance.
|
||||||
"""
|
"""
|
||||||
|
input_size = util.env_opt('token_vector_width', cfg.get('input_size', 128))
|
||||||
return zero_init(Affine(output_size, input_size))
|
return zero_init(Affine(output_size, input_size))
|
||||||
|
|
||||||
def __init__(self, vocab, model=True, **cfg):
|
def __init__(self, vocab, model=True, **cfg):
|
||||||
|
@ -540,12 +541,8 @@ class Tensorizer(Pipe):
|
||||||
return loss
|
return loss
|
||||||
|
|
||||||
def get_loss(self, docs, golds, prediction):
|
def get_loss(self, docs, golds, prediction):
|
||||||
target = []
|
ids = self.model.ops.flatten([doc.to_array(ID).ravel() for doc in docs])
|
||||||
i = 0
|
target = self.vocab.vectors.data[ids]
|
||||||
for doc in docs:
|
|
||||||
vectors = self.model.ops.xp.vstack([w.vector for w in doc])
|
|
||||||
target.append(vectors)
|
|
||||||
target = self.model.ops.xp.vstack(target)
|
|
||||||
d_scores = (prediction - target)
|
d_scores = (prediction - target)
|
||||||
loss = (d_scores**2).sum()
|
loss = (d_scores**2).sum()
|
||||||
return loss, d_scores
|
return loss, d_scores
|
||||||
|
|
Loading…
Reference in New Issue
Block a user