mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-25 21:21:10 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			92 lines
		
	
	
		
			3.4 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			92 lines
		
	
	
		
			3.4 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
| //- 💫 DOCS > USAGE > DEEP LEARNING > PYTORCH
 | |
| 
 | |
| +infobox
 | |
|     +infobox-logos(["pytorch", 100, 48, "http://pytorch.org"])
 | |
|     |  #[strong PyTorch] is a dynamic neural network library, which can be much
 | |
|     |  easier to work with for NLP. Outside of Google, there's a general shift
 | |
|     |  among NLP researchers to both Pytorch and DyNet. spaCy is the front-end
 | |
|     |  of choice for PyTorch's #[code torch.text] extension. You can use PyTorch
 | |
|     |  to create spaCy pipeline components, to add annotations to the
 | |
|     |  #[code Doc] object.
 | |
| 
 | |
| +under-construction
 | |
| 
 | |
| p
 | |
|     |  Here's how a #[code begin_update] function that wraps an arbitrary
 | |
|     |  PyTorch model would look:
 | |
| 
 | |
| +code.
 | |
|     class PytorchWrapper(thinc.neural.Model):
 | |
|         def __init__(self, pytorch_model):
 | |
|             self.pytorch_model = pytorch_model
 | |
| 
 | |
|         def begin_update(self, x_data, drop=0.):
 | |
|             x_var = Variable(x_data)
 | |
|             # Make prediction
 | |
|             y_var = pytorch_model.forward(x_var)
 | |
|             def backward(dy_data, sgd=None):
 | |
|                 dy_var = Variable(dy_data)
 | |
|                 dx_var = torch.autograd.backward(x_var, dy_var)
 | |
|                 return dx_var
 | |
|             return y_var.data, backward
 | |
| 
 | |
| p
 | |
|     |  PyTorch requires data to be wrapped in a container, #[code Variable],
 | |
|     |  that tracks the operations performed on the data. This "tape" of
 | |
|     |  operations is then used by #[code torch.autograd.backward] to compute the
 | |
|     |  gradient with respect to the input. For example, the following code
 | |
|     |  constructs a PyTorch Linear layer that takes a vector of shape
 | |
|     |  #[code (length, 2)], multiples it by a #[code (2, 2)] matrix of weights,
 | |
|     |  adds a #[code (2,)] bias, and returns the resulting #[code (length, 2)]
 | |
|     |  vector:
 | |
| 
 | |
| +code("PyTorch Linear").
 | |
|     from torch import autograd
 | |
|     from torch import nn
 | |
|     import torch
 | |
|     import numpy
 | |
| 
 | |
|     pt_model = nn.Linear(2, 2)
 | |
|     length = 5
 | |
| 
 | |
|     input_data = numpy.ones((5, 2), dtype='f')
 | |
|     input_var = autograd.Variable(torch.Tensor(input_data))
 | |
| 
 | |
|     output_var = pt_model(input_var)
 | |
|     output_data = output_var.data.numpy()
 | |
| 
 | |
| p
 | |
|     |  Given target values we would like the output data to approximate, we can
 | |
|     |  then "learn" values of the parameters within #[code pt_model], to give us
 | |
|     |  output that's closer to our target. As a trivial example, let's make the
 | |
|     |  linear layer compute the negative inverse of the input:
 | |
| 
 | |
| +code.
 | |
|     def get_target(input_data):
 | |
|         return -(1 / input_data)
 | |
| 
 | |
| p
 | |
|     |  To update the PyTorch model, we create an optimizer and give it
 | |
|     |  references to the model's parameters. We'll then randomly generate input
 | |
|     |  data and get the target result we'd like the function to produce. We then
 | |
|     |  compute the #[strong gradient of the error] between the current output
 | |
|     |  and the target. Using the most popular definition of "error", this is
 | |
|     |  simply the average difference:
 | |
| 
 | |
| +code.
 | |
|     from torch import optim
 | |
| 
 | |
|     optimizer = optim.SGD(pt_model.parameters(), lr = 0.01)
 | |
|     for i in range(10):
 | |
|         input_data = numpy.random.uniform(-1., 1., (length, 2))
 | |
|         target = -(1 / input_data)
 | |
| 
 | |
|         output_var = pt_model(autograd.Variable(torch.Tensor(input_data)))
 | |
|         output_data = output_var.data.numpy()
 | |
| 
 | |
|         d_output_data = (output_data - target) / length
 | |
|         d_output_var = autograd.Variable(torch.Tensor(d_output_data))
 | |
| 
 | |
|         d_input_var = torch.autograg.backward(output_var, d_output_var)
 | |
|         optimizer.step()
 |