pytorch-example.py 2.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455
  1. import random
  2. import torch
  3. class DynamicNet(torch.nn.Module):
  4. def __init__(self, D_in, H, D_out):
  5. """
  6. In the constructor we construct three nn.Linear instances that we will use
  7. in the forward pass.
  8. """
  9. super(DynamicNet, self).__init__()
  10. self.input_linear = torch.nn.Linear(D_in, H)
  11. self.middle_linear = torch.nn.Linear(H, H)
  12. self.output_linear = torch.nn.Linear(H, D_out)
  13. def forward(self, x):
  14. """
  15. For the forward pass of the model, we randomly choose either 0, 1, 2, or 3
  16. and reuse the middle_linear Module that many times to compute hidden layer
  17. representations.
  18. Since each forward pass builds a dynamic computation graph, we can use normal
  19. Python control-flow operators like loops or conditional statements when
  20. defining the forward pass of the model.
  21. Here we also see that it is perfectly safe to reuse the same Module many
  22. times when defining a computational graph. This is a big improvement from Lua
  23. Torch, where each Module could be used only once.
  24. """
  25. h_relu = self.input_linear(x).clamp(min=0)
  26. for _ in range(random.randint(0, 3)):
  27. h_relu = self.middle_linear(h_relu).clamp(min=0)
  28. y_pred = self.output_linear(h_relu)
  29. return y_pred
  30. # N is batch size; D_in is input dimension;
  31. # H is hidden dimension; D_out is output dimension.
  32. N, D_in, H, D_out = 64, 1000, 100, 10
  33. # Create random Tensors to hold inputs and outputs
  34. x = torch.randn(N, D_in)
  35. y = torch.randn(N, D_out)
  36. # Construct our model by instantiating the class defined above
  37. model = DynamicNet(D_in, H, D_out)
  38. # Construct our loss function and an Optimizer. Training this strange model with
  39. # vanilla stochastic gradient descent is tough, so we use momentum
  40. criterion = torch.nn.MSELoss(reduction='sum')
  41. optimizer = torch.optim.SGD(model.parameters(), lr=1e-4, momentum=0.9)
  42. for t in range(500):
  43. # Forward pass: Compute predicted y by passing x to the model
  44. y_pred = model(x)
  45. # Compute and print loss
  46. loss = criterion(y_pred, y)
  47. if t % 100 == 99:
  48. print(t, loss.item())
  49. # Zero gradients, perform a backward pass, and update the weights.
  50. optimizer.zero_grad()
  51. loss.backward()
  52. optimizer.step()