diff --git a/website/usage/_deep-learning/_pytorch.jade b/website/usage/_deep-learning/_pytorch.jade index cf0f692f9..e3b2fee7b 100644 --- a/website/usage/_deep-learning/_pytorch.jade +++ b/website/usage/_deep-learning/_pytorch.jade @@ -9,83 +9,8 @@ | to create spaCy pipeline components, to add annotations to the | #[code Doc] object. -+under-construction - -p - | Here's how a #[code begin_update] function that wraps an arbitrary - | PyTorch model would look: - +code. - class PytorchWrapper(thinc.neural.Model): - def __init__(self, pytorch_model): - self.pytorch_model = pytorch_model + from thinc.extra.wrappers import PyTorchWrapper + model = PyTorchWrapper(YOUR_PYTORCH_MODEL) - def begin_update(self, x_data, drop=0.): - x_var = Variable(x_data) - # Make prediction - y_var = pytorch_model.forward(x_var) - def backward(dy_data, sgd=None): - dy_var = Variable(dy_data) - dx_var = torch.autograd.backward(x_var, dy_var) - return dx_var - return y_var.data, backward - -p - | PyTorch requires data to be wrapped in a container, #[code Variable], - | that tracks the operations performed on the data. This "tape" of - | operations is then used by #[code torch.autograd.backward] to compute the - | gradient with respect to the input. For example, the following code - | constructs a PyTorch Linear layer that takes a vector of shape - | #[code (length, 2)], multiples it by a #[code (2, 2)] matrix of weights, - | adds a #[code (2,)] bias, and returns the resulting #[code (length, 2)] - | vector: - -+code("PyTorch Linear"). - from torch import autograd - from torch import nn - import torch - import numpy - - pt_model = nn.Linear(2, 2) - length = 5 - - input_data = numpy.ones((5, 2), dtype='f') - input_var = autograd.Variable(torch.Tensor(input_data)) - - output_var = pt_model(input_var) - output_data = output_var.data.numpy() - -p - | Given target values we would like the output data to approximate, we can - | then "learn" values of the parameters within #[code pt_model], to give us - | output that's closer to our target. As a trivial example, let's make the - | linear layer compute the negative inverse of the input: - -+code. - def get_target(input_data): - return -(1 / input_data) - -p - | To update the PyTorch model, we create an optimizer and give it - | references to the model's parameters. We'll then randomly generate input - | data and get the target result we'd like the function to produce. We then - | compute the #[strong gradient of the error] between the current output - | and the target. Using the most popular definition of "error", this is - | simply the average difference: - -+code. - from torch import optim - - optimizer = optim.SGD(pt_model.parameters(), lr = 0.01) - for i in range(10): - input_data = numpy.random.uniform(-1., 1., (length, 2)) - target = -(1 / input_data) - - output_var = pt_model(autograd.Variable(torch.Tensor(input_data))) - output_data = output_var.data.numpy() - - d_output_data = (output_data - target) / length - d_output_var = autograd.Variable(torch.Tensor(d_output_data)) - - d_input_var = torch.autograg.backward(output_var, d_output_var) - optimizer.step() ++github("spacy", "examples/training/train_pytorch_textcat.py")