mirror of
https://github.com/explosion/spaCy.git
synced 2025-10-02 09:56:39 +03:00
Update PyTorch section
This commit is contained in:
parent
47969e306b
commit
29bbe70926
|
@ -9,83 +9,8 @@
|
||||||
| to create spaCy pipeline components, to add annotations to the
|
| to create spaCy pipeline components, to add annotations to the
|
||||||
| #[code Doc] object.
|
| #[code Doc] object.
|
||||||
|
|
||||||
+under-construction
|
|
||||||
|
|
||||||
p
|
|
||||||
| Here's how a #[code begin_update] function that wraps an arbitrary
|
|
||||||
| PyTorch model would look:
|
|
||||||
|
|
||||||
+code.
|
+code.
|
||||||
class PytorchWrapper(thinc.neural.Model):
|
from thinc.extra.wrappers import PyTorchWrapper
|
||||||
def __init__(self, pytorch_model):
|
model = PyTorchWrapper(YOUR_PYTORCH_MODEL)
|
||||||
self.pytorch_model = pytorch_model
|
|
||||||
|
|
||||||
def begin_update(self, x_data, drop=0.):
|
+github("spacy", "examples/training/train_pytorch_textcat.py")
|
||||||
x_var = Variable(x_data)
|
|
||||||
# Make prediction
|
|
||||||
y_var = pytorch_model.forward(x_var)
|
|
||||||
def backward(dy_data, sgd=None):
|
|
||||||
dy_var = Variable(dy_data)
|
|
||||||
dx_var = torch.autograd.backward(x_var, dy_var)
|
|
||||||
return dx_var
|
|
||||||
return y_var.data, backward
|
|
||||||
|
|
||||||
p
|
|
||||||
| PyTorch requires data to be wrapped in a container, #[code Variable],
|
|
||||||
| that tracks the operations performed on the data. This "tape" of
|
|
||||||
| operations is then used by #[code torch.autograd.backward] to compute the
|
|
||||||
| gradient with respect to the input. For example, the following code
|
|
||||||
| constructs a PyTorch Linear layer that takes a vector of shape
|
|
||||||
| #[code (length, 2)], multiples it by a #[code (2, 2)] matrix of weights,
|
|
||||||
| adds a #[code (2,)] bias, and returns the resulting #[code (length, 2)]
|
|
||||||
| vector:
|
|
||||||
|
|
||||||
+code("PyTorch Linear").
|
|
||||||
from torch import autograd
|
|
||||||
from torch import nn
|
|
||||||
import torch
|
|
||||||
import numpy
|
|
||||||
|
|
||||||
pt_model = nn.Linear(2, 2)
|
|
||||||
length = 5
|
|
||||||
|
|
||||||
input_data = numpy.ones((5, 2), dtype='f')
|
|
||||||
input_var = autograd.Variable(torch.Tensor(input_data))
|
|
||||||
|
|
||||||
output_var = pt_model(input_var)
|
|
||||||
output_data = output_var.data.numpy()
|
|
||||||
|
|
||||||
p
|
|
||||||
| Given target values we would like the output data to approximate, we can
|
|
||||||
| then "learn" values of the parameters within #[code pt_model], to give us
|
|
||||||
| output that's closer to our target. As a trivial example, let's make the
|
|
||||||
| linear layer compute the negative inverse of the input:
|
|
||||||
|
|
||||||
+code.
|
|
||||||
def get_target(input_data):
|
|
||||||
return -(1 / input_data)
|
|
||||||
|
|
||||||
p
|
|
||||||
| To update the PyTorch model, we create an optimizer and give it
|
|
||||||
| references to the model's parameters. We'll then randomly generate input
|
|
||||||
| data and get the target result we'd like the function to produce. We then
|
|
||||||
| compute the #[strong gradient of the error] between the current output
|
|
||||||
| and the target. Using the most popular definition of "error", this is
|
|
||||||
| simply the average difference:
|
|
||||||
|
|
||||||
+code.
|
|
||||||
from torch import optim
|
|
||||||
|
|
||||||
optimizer = optim.SGD(pt_model.parameters(), lr = 0.01)
|
|
||||||
for i in range(10):
|
|
||||||
input_data = numpy.random.uniform(-1., 1., (length, 2))
|
|
||||||
target = -(1 / input_data)
|
|
||||||
|
|
||||||
output_var = pt_model(autograd.Variable(torch.Tensor(input_data)))
|
|
||||||
output_data = output_var.data.numpy()
|
|
||||||
|
|
||||||
d_output_data = (output_data - target) / length
|
|
||||||
d_output_var = autograd.Variable(torch.Tensor(d_output_data))
|
|
||||||
|
|
||||||
d_input_var = torch.autograg.backward(output_var, d_output_var)
|
|
||||||
optimizer.step()
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user