diff --git a/_downloads/1c22195f47cf0e89ae2e0989dd4be6bb/two_layer_net_optim.py b/_downloads/1c22195f47cf0e89ae2e0989dd4be6bb/two_layer_net_optim.py deleted file mode 100644 index 82b67dcc1b0..00000000000 --- a/_downloads/1c22195f47cf0e89ae2e0989dd4be6bb/two_layer_net_optim.py +++ /dev/null @@ -1,62 +0,0 @@ -# -*- coding: utf-8 -*- -""" -PyTorch: optim --------------- - -A fully-connected ReLU network with one hidden layer, trained to predict y from x -by minimizing squared Euclidean distance. - -This implementation uses the nn package from PyTorch to build the network. - -Rather than manually updating the weights of the model as we have been doing, -we use the optim package to define an Optimizer that will update the weights -for us. The optim package defines many optimization algorithms that are commonly -used for deep learning, including SGD+momentum, RMSProp, Adam, etc. -""" -import torch - -# N is batch size; D_in is input dimension; -# H is hidden dimension; D_out is output dimension. -N, D_in, H, D_out = 64, 1000, 100, 10 - -# Create random Tensors to hold inputs and outputs -x = torch.randn(N, D_in) -y = torch.randn(N, D_out) - -# Use the nn package to define our model and loss function. -model = torch.nn.Sequential( - torch.nn.Linear(D_in, H), - torch.nn.ReLU(), - torch.nn.Linear(H, D_out), -) -loss_fn = torch.nn.MSELoss(reduction='sum') - -# Use the optim package to define an Optimizer that will update the weights of -# the model for us. Here we will use Adam; the optim package contains many other -# optimization algorithms. The first argument to the Adam constructor tells the -# optimizer which Tensors it should update. -learning_rate = 1e-4 -optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) -for t in range(500): - # Forward pass: compute predicted y by passing x to the model. - y_pred = model(x) - - # Compute and print loss. - loss = loss_fn(y_pred, y) - if t % 100 == 99: - print(t, loss.item()) - - # Before the backward pass, use the optimizer object to zero all of the - # gradients for the variables it will update (which are the learnable - # weights of the model). This is because by default, gradients are - # accumulated in buffers( i.e, not overwritten) whenever .backward() - # is called. Checkout docs of torch.autograd.backward for more details. - optimizer.zero_grad() - - # Backward pass: compute gradient of the loss with respect to model - # parameters - loss.backward() - - # Calling the step function on an Optimizer makes an update to its - # parameters - optimizer.step() diff --git a/_downloads/22f070e74b4f293045131e7170efe86d/two_layer_net_custom_function.py b/_downloads/22f070e74b4f293045131e7170efe86d/two_layer_net_custom_function.py deleted file mode 100644 index 2d2a0875669..00000000000 --- a/_downloads/22f070e74b4f293045131e7170efe86d/two_layer_net_custom_function.py +++ /dev/null @@ -1,97 +0,0 @@ -# -*- coding: utf-8 -*- -""" -PyTorch: Defining New autograd Functions ----------------------------------------- - -A fully-connected ReLU network with one hidden layer and no biases, trained to -predict y from x by minimizing squared Euclidean distance. - -This implementation computes the forward pass using operations on PyTorch -Variables, and uses PyTorch autograd to compute gradients. - -In this implementation we implement our own custom autograd function to perform -the ReLU function. -""" -import torch - - -class MyReLU(torch.autograd.Function): - """ - We can implement our own custom autograd Functions by subclassing - torch.autograd.Function and implementing the forward and backward passes - which operate on Tensors. - """ - - @staticmethod - def forward(ctx, input): - """ - In the forward pass we receive a Tensor containing the input and return - a Tensor containing the output. ctx is a context object that can be used - to stash information for backward computation. You can cache arbitrary - objects for use in the backward pass using the ctx.save_for_backward method. - """ - ctx.save_for_backward(input) - return input.clamp(min=0) - - @staticmethod - def backward(ctx, grad_output): - """ - In the backward pass we receive a Tensor containing the gradient of the loss - with respect to the output, and we need to compute the gradient of the loss - with respect to the input. - """ - input, = ctx.saved_tensors - grad_input = grad_output.clone() - grad_input[input < 0] = 0 - return grad_input - - -dtype = torch.float -device = torch.device("cpu") -# device = torch.device("cuda:0") # Uncomment this to run on GPU -# torch.backends.cuda.matmul.allow_tf32 = False # Uncomment this to run on GPU - -# The above line disables TensorFloat32. This a feature that allows -# networks to run at a much faster speed while sacrificing precision. -# Although TensorFloat32 works well on most real models, for our toy model -# in this tutorial, the sacrificed precision causes convergence issue. -# For more information, see: -# https://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-devices - -# N is batch size; D_in is input dimension; -# H is hidden dimension; D_out is output dimension. -N, D_in, H, D_out = 64, 1000, 100, 10 - -# Create random Tensors to hold input and outputs. -x = torch.randn(N, D_in, device=device, dtype=dtype) -y = torch.randn(N, D_out, device=device, dtype=dtype) - -# Create random Tensors for weights. -w1 = torch.randn(D_in, H, device=device, dtype=dtype, requires_grad=True) -w2 = torch.randn(H, D_out, device=device, dtype=dtype, requires_grad=True) - -learning_rate = 1e-6 -for t in range(500): - # To apply our Function, we use Function.apply method. We alias this as 'relu'. - relu = MyReLU.apply - - # Forward pass: compute predicted y using operations; we compute - # ReLU using our custom autograd operation. - y_pred = relu(x.mm(w1)).mm(w2) - - # Compute and print loss - loss = (y_pred - y).pow(2).sum() - if t % 100 == 99: - print(t, loss.item()) - - # Use autograd to compute the backward pass. - loss.backward() - - # Update weights using gradient descent - with torch.no_grad(): - w1 -= learning_rate * w1.grad - w2 -= learning_rate * w2.grad - - # Manually zero the gradients after updating weights - w1.grad.zero_() - w2.grad.zero_() diff --git a/_downloads/39d9c599559353177d1597d787c8e2fd/two_layer_net_nn.py b/_downloads/39d9c599559353177d1597d787c8e2fd/two_layer_net_nn.py deleted file mode 100644 index 0c1925878e8..00000000000 --- a/_downloads/39d9c599559353177d1597d787c8e2fd/two_layer_net_nn.py +++ /dev/null @@ -1,68 +0,0 @@ -# -*- coding: utf-8 -*- -""" -PyTorch: nn ------------ - -A fully-connected ReLU network with one hidden layer, trained to predict y from x -by minimizing squared Euclidean distance. - -This implementation uses the nn package from PyTorch to build the network. -PyTorch autograd makes it easy to define computational graphs and take gradients, -but raw autograd can be a bit too low-level for defining complex neural networks; -this is where the nn package can help. The nn package defines a set of Modules, -which you can think of as a neural network layer that has produces output from -input and may have some trainable weights. -""" -import torch - -# N is batch size; D_in is input dimension; -# H is hidden dimension; D_out is output dimension. -N, D_in, H, D_out = 64, 1000, 100, 10 - -# Create random Tensors to hold inputs and outputs -x = torch.randn(N, D_in) -y = torch.randn(N, D_out) - -# Use the nn package to define our model as a sequence of layers. nn.Sequential -# is a Module which contains other Modules, and applies them in sequence to -# produce its output. Each Linear Module computes output from input using a -# linear function, and holds internal Tensors for its weight and bias. -model = torch.nn.Sequential( - torch.nn.Linear(D_in, H), - torch.nn.ReLU(), - torch.nn.Linear(H, D_out), -) - -# The nn package also contains definitions of popular loss functions; in this -# case we will use Mean Squared Error (MSE) as our loss function. -loss_fn = torch.nn.MSELoss(reduction='sum') - -learning_rate = 1e-4 -for t in range(500): - # Forward pass: compute predicted y by passing x to the model. Module objects - # override the __call__ operator so you can call them like functions. When - # doing so you pass a Tensor of input data to the Module and it produces - # a Tensor of output data. - y_pred = model(x) - - # Compute and print loss. We pass Tensors containing the predicted and true - # values of y, and the loss function returns a Tensor containing the - # loss. - loss = loss_fn(y_pred, y) - if t % 100 == 99: - print(t, loss.item()) - - # Zero the gradients before running the backward pass. - model.zero_grad() - - # Backward pass: compute gradient of the loss with respect to all the learnable - # parameters of the model. Internally, the parameters of each Module are stored - # in Tensors with requires_grad=True, so this call will compute gradients for - # all learnable parameters in the model. - loss.backward() - - # Update the weights using gradient descent. Each parameter is a Tensor, so - # we can access its gradients like we did before. - with torch.no_grad(): - for param in model.parameters(): - param -= learning_rate * param.grad diff --git a/_downloads/42443af84e4770f8d5353f2c6e48f033/two_layer_net_tensor.py b/_downloads/42443af84e4770f8d5353f2c6e48f033/two_layer_net_tensor.py deleted file mode 100644 index 3eacae42702..00000000000 --- a/_downloads/42443af84e4770f8d5353f2c6e48f033/two_layer_net_tensor.py +++ /dev/null @@ -1,62 +0,0 @@ -# -*- coding: utf-8 -*- -""" -PyTorch: Tensors ----------------- - -A fully-connected ReLU network with one hidden layer and no biases, trained to -predict y from x by minimizing squared Euclidean distance. - -This implementation uses PyTorch tensors to manually compute the forward pass, -loss, and backward pass. - -A PyTorch Tensor is basically the same as a numpy array: it does not know -anything about deep learning or computational graphs or gradients, and is just -a generic n-dimensional array to be used for arbitrary numeric computation. - -The biggest difference between a numpy array and a PyTorch Tensor is that -a PyTorch Tensor can run on either CPU or GPU. To run operations on the GPU, -just cast the Tensor to a cuda datatype. -""" - -import torch - - -dtype = torch.float -device = torch.device("cpu") -# device = torch.device("cuda:0") # Uncomment this to run on GPU - -# N is batch size; D_in is input dimension; -# H is hidden dimension; D_out is output dimension. -N, D_in, H, D_out = 64, 1000, 100, 10 - -# Create random input and output data -x = torch.randn(N, D_in, device=device, dtype=dtype) -y = torch.randn(N, D_out, device=device, dtype=dtype) - -# Randomly initialize weights -w1 = torch.randn(D_in, H, device=device, dtype=dtype) -w2 = torch.randn(H, D_out, device=device, dtype=dtype) - -learning_rate = 1e-6 -for t in range(500): - # Forward pass: compute predicted y - h = x.mm(w1) - h_relu = h.clamp(min=0) - y_pred = h_relu.mm(w2) - - # Compute and print loss - loss = (y_pred - y).pow(2).sum().item() - if t % 100 == 99: - print(t, loss) - - # Backprop to compute gradients of w1 and w2 with respect to loss - grad_y_pred = 2.0 * (y_pred - y) - grad_w2 = h_relu.t().mm(grad_y_pred) - grad_h_relu = grad_y_pred.mm(w2.t()) - grad_h = grad_h_relu.clone() - grad_h[h < 0] = 0 - grad_w1 = x.t().mm(grad_h) - - # Update weights using gradient descent - w1 -= learning_rate * grad_w1 - w2 -= learning_rate * grad_w2 diff --git a/_downloads/445f984d6e8c379cf0aefd16ef44a4da/two_layer_net_numpy.py b/_downloads/445f984d6e8c379cf0aefd16ef44a4da/two_layer_net_numpy.py deleted file mode 100644 index f003d0f002b..00000000000 --- a/_downloads/445f984d6e8c379cf0aefd16ef44a4da/two_layer_net_numpy.py +++ /dev/null @@ -1,51 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Warm-up: numpy --------------- - -A fully-connected ReLU network with one hidden layer and no biases, trained to -predict y from x using Euclidean error. - -This implementation uses numpy to manually compute the forward pass, loss, and -backward pass. - -A numpy array is a generic n-dimensional array; it does not know anything about -deep learning or gradients or computational graphs, and is just a way to perform -generic numeric computations. -""" -import numpy as np - -# N is batch size; D_in is input dimension; -# H is hidden dimension; D_out is output dimension. -N, D_in, H, D_out = 64, 1000, 100, 10 - -# Create random input and output data -x = np.random.randn(N, D_in) -y = np.random.randn(N, D_out) - -# Randomly initialize weights -w1 = np.random.randn(D_in, H) -w2 = np.random.randn(H, D_out) - -learning_rate = 1e-6 -for t in range(500): - # Forward pass: compute predicted y - h = x.dot(w1) - h_relu = np.maximum(h, 0) - y_pred = h_relu.dot(w2) - - # Compute and print loss - loss = np.square(y_pred - y).sum() - print(t, loss) - - # Backprop to compute gradients of w1 and w2 with respect to loss - grad_y_pred = 2.0 * (y_pred - y) - grad_w2 = h_relu.T.dot(grad_y_pred) - grad_h_relu = grad_y_pred.dot(w2.T) - grad_h = grad_h_relu.copy() - grad_h[h < 0] = 0 - grad_w1 = x.T.dot(grad_h) - - # Update weights - w1 -= learning_rate * grad_w1 - w2 -= learning_rate * grad_w2 diff --git a/_downloads/449b1270b788a2ddf451772f6a4e7470/two_layer_net_tensor.ipynb b/_downloads/449b1270b788a2ddf451772f6a4e7470/two_layer_net_tensor.ipynb deleted file mode 100644 index da6e31bbc62..00000000000 --- a/_downloads/449b1270b788a2ddf451772f6a4e7470/two_layer_net_tensor.ipynb +++ /dev/null @@ -1,54 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "%matplotlib inline" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\nPyTorch: Tensors\n----------------\n\nA fully-connected ReLU network with one hidden layer and no biases, trained to\npredict y from x by minimizing squared Euclidean distance.\n\nThis implementation uses PyTorch tensors to manually compute the forward pass,\nloss, and backward pass.\n\nA PyTorch Tensor is basically the same as a numpy array: it does not know\nanything about deep learning or computational graphs or gradients, and is just\na generic n-dimensional array to be used for arbitrary numeric computation.\n\nThe biggest difference between a numpy array and a PyTorch Tensor is that\na PyTorch Tensor can run on either CPU or GPU. To run operations on the GPU,\njust cast the Tensor to a cuda datatype.\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "import torch\n\n\ndtype = torch.float\ndevice = torch.device(\"cpu\")\n# device = torch.device(\"cuda:0\") # Uncomment this to run on GPU\n\n# N is batch size; D_in is input dimension;\n# H is hidden dimension; D_out is output dimension.\nN, D_in, H, D_out = 64, 1000, 100, 10\n\n# Create random input and output data\nx = torch.randn(N, D_in, device=device, dtype=dtype)\ny = torch.randn(N, D_out, device=device, dtype=dtype)\n\n# Randomly initialize weights\nw1 = torch.randn(D_in, H, device=device, dtype=dtype)\nw2 = torch.randn(H, D_out, device=device, dtype=dtype)\n\nlearning_rate = 1e-6\nfor t in range(500):\n # Forward pass: compute predicted y\n h = x.mm(w1)\n h_relu = h.clamp(min=0)\n y_pred = h_relu.mm(w2)\n\n # Compute and print loss\n loss = (y_pred - y).pow(2).sum().item()\n if t % 100 == 99:\n print(t, loss)\n\n # Backprop to compute gradients of w1 and w2 with respect to loss\n grad_y_pred = 2.0 * (y_pred - y)\n grad_w2 = h_relu.t().mm(grad_y_pred)\n grad_h_relu = grad_y_pred.mm(w2.t())\n grad_h = grad_h_relu.clone()\n grad_h[h < 0] = 0\n grad_w1 = x.t().mm(grad_h)\n\n # Update weights using gradient descent\n w1 -= learning_rate * grad_w1\n w2 -= learning_rate * grad_w2" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.8" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} \ No newline at end of file diff --git a/_downloads/7df1a8d6c18d6c7122649856a1ac16b8/two_layer_net_custom_function.ipynb b/_downloads/7df1a8d6c18d6c7122649856a1ac16b8/two_layer_net_custom_function.ipynb deleted file mode 100644 index b5502d22c59..00000000000 --- a/_downloads/7df1a8d6c18d6c7122649856a1ac16b8/two_layer_net_custom_function.ipynb +++ /dev/null @@ -1,54 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "%matplotlib inline" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\nPyTorch: Defining New autograd Functions\n----------------------------------------\n\nA fully-connected ReLU network with one hidden layer and no biases, trained to\npredict y from x by minimizing squared Euclidean distance.\n\nThis implementation computes the forward pass using operations on PyTorch\nVariables, and uses PyTorch autograd to compute gradients.\n\nIn this implementation we implement our own custom autograd function to perform\nthe ReLU function.\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "import torch\n\n\nclass MyReLU(torch.autograd.Function):\n \"\"\"\n We can implement our own custom autograd Functions by subclassing\n torch.autograd.Function and implementing the forward and backward passes\n which operate on Tensors.\n \"\"\"\n\n @staticmethod\n def forward(ctx, input):\n \"\"\"\n In the forward pass we receive a Tensor containing the input and return\n a Tensor containing the output. ctx is a context object that can be used\n to stash information for backward computation. You can cache arbitrary\n objects for use in the backward pass using the ctx.save_for_backward method.\n \"\"\"\n ctx.save_for_backward(input)\n return input.clamp(min=0)\n\n @staticmethod\n def backward(ctx, grad_output):\n \"\"\"\n In the backward pass we receive a Tensor containing the gradient of the loss\n with respect to the output, and we need to compute the gradient of the loss\n with respect to the input.\n \"\"\"\n input, = ctx.saved_tensors\n grad_input = grad_output.clone()\n grad_input[input < 0] = 0\n return grad_input\n\n\ndtype = torch.float\ndevice = torch.device(\"cpu\")\n# device = torch.device(\"cuda:0\") # Uncomment this to run on GPU\n# torch.backends.cuda.matmul.allow_tf32 = False # Uncomment this to run on GPU\n\n# The above line disables TensorFloat32. This a feature that allows\n# networks to run at a much faster speed while sacrificing precision.\n# Although TensorFloat32 works well on most real models, for our toy model\n# in this tutorial, the sacrificed precision causes convergence issue.\n# For more information, see:\n# https://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-devices\n\n# N is batch size; D_in is input dimension;\n# H is hidden dimension; D_out is output dimension.\nN, D_in, H, D_out = 64, 1000, 100, 10\n\n# Create random Tensors to hold input and outputs.\nx = torch.randn(N, D_in, device=device, dtype=dtype)\ny = torch.randn(N, D_out, device=device, dtype=dtype)\n\n# Create random Tensors for weights.\nw1 = torch.randn(D_in, H, device=device, dtype=dtype, requires_grad=True)\nw2 = torch.randn(H, D_out, device=device, dtype=dtype, requires_grad=True)\n\nlearning_rate = 1e-6\nfor t in range(500):\n # To apply our Function, we use Function.apply method. We alias this as 'relu'.\n relu = MyReLU.apply\n\n # Forward pass: compute predicted y using operations; we compute\n # ReLU using our custom autograd operation.\n y_pred = relu(x.mm(w1)).mm(w2)\n\n # Compute and print loss\n loss = (y_pred - y).pow(2).sum()\n if t % 100 == 99:\n print(t, loss.item())\n\n # Use autograd to compute the backward pass.\n loss.backward()\n\n # Update weights using gradient descent\n with torch.no_grad():\n w1 -= learning_rate * w1.grad\n w2 -= learning_rate * w2.grad\n\n # Manually zero the gradients after updating weights\n w1.grad.zero_()\n w2.grad.zero_()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.8" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} \ No newline at end of file diff --git a/_downloads/7f1a8a2cb5a62c19268b29c5a0d7a859/two_layer_net_nn.ipynb b/_downloads/7f1a8a2cb5a62c19268b29c5a0d7a859/two_layer_net_nn.ipynb deleted file mode 100644 index 6abfc0f9ddd..00000000000 --- a/_downloads/7f1a8a2cb5a62c19268b29c5a0d7a859/two_layer_net_nn.ipynb +++ /dev/null @@ -1,54 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "%matplotlib inline" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\nPyTorch: nn\n-----------\n\nA fully-connected ReLU network with one hidden layer, trained to predict y from x\nby minimizing squared Euclidean distance.\n\nThis implementation uses the nn package from PyTorch to build the network.\nPyTorch autograd makes it easy to define computational graphs and take gradients,\nbut raw autograd can be a bit too low-level for defining complex neural networks;\nthis is where the nn package can help. The nn package defines a set of Modules,\nwhich you can think of as a neural network layer that has produces output from\ninput and may have some trainable weights.\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "import torch\n\n# N is batch size; D_in is input dimension;\n# H is hidden dimension; D_out is output dimension.\nN, D_in, H, D_out = 64, 1000, 100, 10\n\n# Create random Tensors to hold inputs and outputs\nx = torch.randn(N, D_in)\ny = torch.randn(N, D_out)\n\n# Use the nn package to define our model as a sequence of layers. nn.Sequential\n# is a Module which contains other Modules, and applies them in sequence to\n# produce its output. Each Linear Module computes output from input using a\n# linear function, and holds internal Tensors for its weight and bias.\nmodel = torch.nn.Sequential(\n torch.nn.Linear(D_in, H),\n torch.nn.ReLU(),\n torch.nn.Linear(H, D_out),\n)\n\n# The nn package also contains definitions of popular loss functions; in this\n# case we will use Mean Squared Error (MSE) as our loss function.\nloss_fn = torch.nn.MSELoss(reduction='sum')\n\nlearning_rate = 1e-4\nfor t in range(500):\n # Forward pass: compute predicted y by passing x to the model. Module objects\n # override the __call__ operator so you can call them like functions. When\n # doing so you pass a Tensor of input data to the Module and it produces\n # a Tensor of output data.\n y_pred = model(x)\n\n # Compute and print loss. We pass Tensors containing the predicted and true\n # values of y, and the loss function returns a Tensor containing the\n # loss.\n loss = loss_fn(y_pred, y)\n if t % 100 == 99:\n print(t, loss.item())\n\n # Zero the gradients before running the backward pass.\n model.zero_grad()\n\n # Backward pass: compute gradient of the loss with respect to all the learnable\n # parameters of the model. Internally, the parameters of each Module are stored\n # in Tensors with requires_grad=True, so this call will compute gradients for\n # all learnable parameters in the model.\n loss.backward()\n\n # Update the weights using gradient descent. Each parameter is a Tensor, so\n # we can access its gradients like we did before.\n with torch.no_grad():\n for param in model.parameters():\n param -= learning_rate * param.grad" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.8" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} \ No newline at end of file diff --git a/_downloads/843ace305951b2a897f80a52e1189938/two_layer_net_module.ipynb b/_downloads/843ace305951b2a897f80a52e1189938/two_layer_net_module.ipynb deleted file mode 100644 index 995ff4b376d..00000000000 --- a/_downloads/843ace305951b2a897f80a52e1189938/two_layer_net_module.ipynb +++ /dev/null @@ -1,54 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "%matplotlib inline" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\nPyTorch: Custom nn Modules\n--------------------------\n\nA fully-connected ReLU network with one hidden layer, trained to predict y from x\nby minimizing squared Euclidean distance.\n\nThis implementation defines the model as a custom Module subclass. Whenever you\nwant a model more complex than a simple sequence of existing Modules you will\nneed to define your model this way.\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "import torch\n\n\nclass TwoLayerNet(torch.nn.Module):\n def __init__(self, D_in, H, D_out):\n \"\"\"\n In the constructor we instantiate two nn.Linear modules and assign them as\n member variables.\n \"\"\"\n super(TwoLayerNet, self).__init__()\n self.linear1 = torch.nn.Linear(D_in, H)\n self.linear2 = torch.nn.Linear(H, D_out)\n\n def forward(self, x):\n \"\"\"\n In the forward function we accept a Tensor of input data and we must return\n a Tensor of output data. We can use Modules defined in the constructor as\n well as arbitrary operators on Tensors.\n \"\"\"\n h_relu = self.linear1(x).clamp(min=0)\n y_pred = self.linear2(h_relu)\n return y_pred\n\n\n# N is batch size; D_in is input dimension;\n# H is hidden dimension; D_out is output dimension.\nN, D_in, H, D_out = 64, 1000, 100, 10\n\n# Create random Tensors to hold inputs and outputs\nx = torch.randn(N, D_in)\ny = torch.randn(N, D_out)\n\n# Construct our model by instantiating the class defined above\nmodel = TwoLayerNet(D_in, H, D_out)\n\n# Construct our loss function and an Optimizer. The call to model.parameters()\n# in the SGD constructor will contain the learnable parameters of the two\n# nn.Linear modules which are members of the model.\ncriterion = torch.nn.MSELoss(reduction='sum')\noptimizer = torch.optim.SGD(model.parameters(), lr=1e-4)\nfor t in range(500):\n # Forward pass: Compute predicted y by passing x to the model\n y_pred = model(x)\n\n # Compute and print loss\n loss = criterion(y_pred, y)\n if t % 100 == 99:\n print(t, loss.item())\n\n # Zero gradients, perform a backward pass, and update the weights.\n optimizer.zero_grad()\n loss.backward()\n optimizer.step()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.8" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} \ No newline at end of file diff --git a/_downloads/a48ba4d15c30996aeba51337fb0c8dd7/two_layer_net_optim.ipynb b/_downloads/a48ba4d15c30996aeba51337fb0c8dd7/two_layer_net_optim.ipynb deleted file mode 100644 index 202d7a49812..00000000000 --- a/_downloads/a48ba4d15c30996aeba51337fb0c8dd7/two_layer_net_optim.ipynb +++ /dev/null @@ -1,54 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "%matplotlib inline" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\nPyTorch: optim\n--------------\n\nA fully-connected ReLU network with one hidden layer, trained to predict y from x\nby minimizing squared Euclidean distance.\n\nThis implementation uses the nn package from PyTorch to build the network.\n\nRather than manually updating the weights of the model as we have been doing,\nwe use the optim package to define an Optimizer that will update the weights\nfor us. The optim package defines many optimization algorithms that are commonly\nused for deep learning, including SGD+momentum, RMSProp, Adam, etc.\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "import torch\n\n# N is batch size; D_in is input dimension;\n# H is hidden dimension; D_out is output dimension.\nN, D_in, H, D_out = 64, 1000, 100, 10\n\n# Create random Tensors to hold inputs and outputs\nx = torch.randn(N, D_in)\ny = torch.randn(N, D_out)\n\n# Use the nn package to define our model and loss function.\nmodel = torch.nn.Sequential(\n torch.nn.Linear(D_in, H),\n torch.nn.ReLU(),\n torch.nn.Linear(H, D_out),\n)\nloss_fn = torch.nn.MSELoss(reduction='sum')\n\n# Use the optim package to define an Optimizer that will update the weights of\n# the model for us. Here we will use Adam; the optim package contains many other\n# optimization algorithms. The first argument to the Adam constructor tells the\n# optimizer which Tensors it should update.\nlearning_rate = 1e-4\noptimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)\nfor t in range(500):\n # Forward pass: compute predicted y by passing x to the model.\n y_pred = model(x)\n\n # Compute and print loss.\n loss = loss_fn(y_pred, y)\n if t % 100 == 99:\n print(t, loss.item())\n\n # Before the backward pass, use the optimizer object to zero all of the\n # gradients for the variables it will update (which are the learnable\n # weights of the model). This is because by default, gradients are\n # accumulated in buffers( i.e, not overwritten) whenever .backward()\n # is called. Checkout docs of torch.autograd.backward for more details.\n optimizer.zero_grad()\n\n # Backward pass: compute gradient of the loss with respect to model\n # parameters\n loss.backward()\n\n # Calling the step function on an Optimizer makes an update to its\n # parameters\n optimizer.step()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.8" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} \ No newline at end of file diff --git a/_downloads/b8ed57e2c40a637d92a891b4cf7e72f6/two_layer_net_autograd.ipynb b/_downloads/b8ed57e2c40a637d92a891b4cf7e72f6/two_layer_net_autograd.ipynb deleted file mode 100644 index 54cef9b61fa..00000000000 --- a/_downloads/b8ed57e2c40a637d92a891b4cf7e72f6/two_layer_net_autograd.ipynb +++ /dev/null @@ -1,54 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "%matplotlib inline" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\nPyTorch: Tensors and autograd\n-------------------------------\n\nA fully-connected ReLU network with one hidden layer and no biases, trained to\npredict y from x by minimizing squared Euclidean distance.\n\nThis implementation computes the forward pass using operations on PyTorch\nTensors, and uses PyTorch autograd to compute gradients.\n\n\nA PyTorch Tensor represents a node in a computational graph. If ``x`` is a\nTensor that has ``x.requires_grad=True`` then ``x.grad`` is another Tensor\nholding the gradient of ``x`` with respect to some scalar value.\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "import torch\n\ndtype = torch.float\ndevice = torch.device(\"cpu\")\n# device = torch.device(\"cuda:0\") # Uncomment this to run on GPU\n# torch.backends.cuda.matmul.allow_tf32 = False # Uncomment this to run on GPU\n\n# The above line disables TensorFloat32. This a feature that allows\n# networks to run at a much faster speed while sacrificing precision.\n# Although TensorFloat32 works well on most real models, for our toy model\n# in this tutorial, the sacrificed precision causes convergence issue.\n# For more information, see:\n# https://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-devices\n\n# N is batch size; D_in is input dimension;\n# H is hidden dimension; D_out is output dimension.\nN, D_in, H, D_out = 64, 1000, 100, 10\n\n# Create random Tensors to hold input and outputs.\n# Setting requires_grad=False indicates that we do not need to compute gradients\n# with respect to these Tensors during the backward pass.\nx = torch.randn(N, D_in, device=device, dtype=dtype)\ny = torch.randn(N, D_out, device=device, dtype=dtype)\n\n# Create random Tensors for weights.\n# Setting requires_grad=True indicates that we want to compute gradients with\n# respect to these Tensors during the backward pass.\nw1 = torch.randn(D_in, H, device=device, dtype=dtype, requires_grad=True)\nw2 = torch.randn(H, D_out, device=device, dtype=dtype, requires_grad=True)\n\nlearning_rate = 1e-6\nfor t in range(500):\n # Forward pass: compute predicted y using operations on Tensors; these\n # are exactly the same operations we used to compute the forward pass using\n # Tensors, but we do not need to keep references to intermediate values since\n # we are not implementing the backward pass by hand.\n y_pred = x.mm(w1).clamp(min=0).mm(w2)\n\n # Compute and print loss using operations on Tensors.\n # Now loss is a Tensor of shape (1,)\n # loss.item() gets the scalar value held in the loss.\n loss = (y_pred - y).pow(2).sum()\n if t % 100 == 99:\n print(t, loss.item())\n\n # Use autograd to compute the backward pass. This call will compute the\n # gradient of loss with respect to all Tensors with requires_grad=True.\n # After this call w1.grad and w2.grad will be Tensors holding the gradient\n # of the loss with respect to w1 and w2 respectively.\n loss.backward()\n\n # Manually update weights using gradient descent. Wrap in torch.no_grad()\n # because weights have requires_grad=True, but we don't need to track this\n # in autograd.\n # An alternative way is to operate on weight.data and weight.grad.data.\n # Recall that tensor.data gives a tensor that shares the storage with\n # tensor, but doesn't track history.\n # You can also use torch.optim.SGD to achieve this.\n with torch.no_grad():\n w1 -= learning_rate * w1.grad\n w2 -= learning_rate * w2.grad\n\n # Manually zero the gradients after updating weights\n w1.grad.zero_()\n w2.grad.zero_()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.8" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} \ No newline at end of file diff --git a/_downloads/e11b7a716744fc603445217d61ec787f/two_layer_net_autograd.py b/_downloads/e11b7a716744fc603445217d61ec787f/two_layer_net_autograd.py deleted file mode 100644 index ebbc98b2bb8..00000000000 --- a/_downloads/e11b7a716744fc603445217d61ec787f/two_layer_net_autograd.py +++ /dev/null @@ -1,81 +0,0 @@ -# -*- coding: utf-8 -*- -""" -PyTorch: Tensors and autograd -------------------------------- - -A fully-connected ReLU network with one hidden layer and no biases, trained to -predict y from x by minimizing squared Euclidean distance. - -This implementation computes the forward pass using operations on PyTorch -Tensors, and uses PyTorch autograd to compute gradients. - - -A PyTorch Tensor represents a node in a computational graph. If ``x`` is a -Tensor that has ``x.requires_grad=True`` then ``x.grad`` is another Tensor -holding the gradient of ``x`` with respect to some scalar value. -""" -import torch - -dtype = torch.float -device = torch.device("cpu") -# device = torch.device("cuda:0") # Uncomment this to run on GPU -# torch.backends.cuda.matmul.allow_tf32 = False # Uncomment this to run on GPU - -# The above line disables TensorFloat32. This a feature that allows -# networks to run at a much faster speed while sacrificing precision. -# Although TensorFloat32 works well on most real models, for our toy model -# in this tutorial, the sacrificed precision causes convergence issue. -# For more information, see: -# https://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-devices - -# N is batch size; D_in is input dimension; -# H is hidden dimension; D_out is output dimension. -N, D_in, H, D_out = 64, 1000, 100, 10 - -# Create random Tensors to hold input and outputs. -# Setting requires_grad=False indicates that we do not need to compute gradients -# with respect to these Tensors during the backward pass. -x = torch.randn(N, D_in, device=device, dtype=dtype) -y = torch.randn(N, D_out, device=device, dtype=dtype) - -# Create random Tensors for weights. -# Setting requires_grad=True indicates that we want to compute gradients with -# respect to these Tensors during the backward pass. -w1 = torch.randn(D_in, H, device=device, dtype=dtype, requires_grad=True) -w2 = torch.randn(H, D_out, device=device, dtype=dtype, requires_grad=True) - -learning_rate = 1e-6 -for t in range(500): - # Forward pass: compute predicted y using operations on Tensors; these - # are exactly the same operations we used to compute the forward pass using - # Tensors, but we do not need to keep references to intermediate values since - # we are not implementing the backward pass by hand. - y_pred = x.mm(w1).clamp(min=0).mm(w2) - - # Compute and print loss using operations on Tensors. - # Now loss is a Tensor of shape (1,) - # loss.item() gets the scalar value held in the loss. - loss = (y_pred - y).pow(2).sum() - if t % 100 == 99: - print(t, loss.item()) - - # Use autograd to compute the backward pass. This call will compute the - # gradient of loss with respect to all Tensors with requires_grad=True. - # After this call w1.grad and w2.grad will be Tensors holding the gradient - # of the loss with respect to w1 and w2 respectively. - loss.backward() - - # Manually update weights using gradient descent. Wrap in torch.no_grad() - # because weights have requires_grad=True, but we don't need to track this - # in autograd. - # An alternative way is to operate on weight.data and weight.grad.data. - # Recall that tensor.data gives a tensor that shares the storage with - # tensor, but doesn't track history. - # You can also use torch.optim.SGD to achieve this. - with torch.no_grad(): - w1 -= learning_rate * w1.grad - w2 -= learning_rate * w2.grad - - # Manually zero the gradients after updating weights - w1.grad.zero_() - w2.grad.zero_() diff --git a/_downloads/f00a2a1e490ec62f7c1bcb950318b7f7/two_layer_net_numpy.ipynb b/_downloads/f00a2a1e490ec62f7c1bcb950318b7f7/two_layer_net_numpy.ipynb deleted file mode 100644 index 24d97015502..00000000000 --- a/_downloads/f00a2a1e490ec62f7c1bcb950318b7f7/two_layer_net_numpy.ipynb +++ /dev/null @@ -1,54 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "%matplotlib inline" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\nWarm-up: numpy\n--------------\n\nA fully-connected ReLU network with one hidden layer and no biases, trained to\npredict y from x using Euclidean error.\n\nThis implementation uses numpy to manually compute the forward pass, loss, and\nbackward pass.\n\nA numpy array is a generic n-dimensional array; it does not know anything about\ndeep learning or gradients or computational graphs, and is just a way to perform\ngeneric numeric computations.\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "import numpy as np\n\n# N is batch size; D_in is input dimension;\n# H is hidden dimension; D_out is output dimension.\nN, D_in, H, D_out = 64, 1000, 100, 10\n\n# Create random input and output data\nx = np.random.randn(N, D_in)\ny = np.random.randn(N, D_out)\n\n# Randomly initialize weights\nw1 = np.random.randn(D_in, H)\nw2 = np.random.randn(H, D_out)\n\nlearning_rate = 1e-6\nfor t in range(500):\n # Forward pass: compute predicted y\n h = x.dot(w1)\n h_relu = np.maximum(h, 0)\n y_pred = h_relu.dot(w2)\n\n # Compute and print loss\n loss = np.square(y_pred - y).sum()\n print(t, loss)\n\n # Backprop to compute gradients of w1 and w2 with respect to loss\n grad_y_pred = 2.0 * (y_pred - y)\n grad_w2 = h_relu.T.dot(grad_y_pred)\n grad_h_relu = grad_y_pred.dot(w2.T)\n grad_h = grad_h_relu.copy()\n grad_h[h < 0] = 0\n grad_w1 = x.T.dot(grad_h)\n\n # Update weights\n w1 -= learning_rate * grad_w1\n w2 -= learning_rate * grad_w2" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.8" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} \ No newline at end of file diff --git a/_downloads/fa9717a4ee4117e114380fe627b1350c/two_layer_net_module.py b/_downloads/fa9717a4ee4117e114380fe627b1350c/two_layer_net_module.py deleted file mode 100644 index 29d27274d25..00000000000 --- a/_downloads/fa9717a4ee4117e114380fe627b1350c/two_layer_net_module.py +++ /dev/null @@ -1,65 +0,0 @@ -# -*- coding: utf-8 -*- -""" -PyTorch: Custom nn Modules --------------------------- - -A fully-connected ReLU network with one hidden layer, trained to predict y from x -by minimizing squared Euclidean distance. - -This implementation defines the model as a custom Module subclass. Whenever you -want a model more complex than a simple sequence of existing Modules you will -need to define your model this way. -""" -import torch - - -class TwoLayerNet(torch.nn.Module): - def __init__(self, D_in, H, D_out): - """ - In the constructor we instantiate two nn.Linear modules and assign them as - member variables. - """ - super(TwoLayerNet, self).__init__() - self.linear1 = torch.nn.Linear(D_in, H) - self.linear2 = torch.nn.Linear(H, D_out) - - def forward(self, x): - """ - In the forward function we accept a Tensor of input data and we must return - a Tensor of output data. We can use Modules defined in the constructor as - well as arbitrary operators on Tensors. - """ - h_relu = self.linear1(x).clamp(min=0) - y_pred = self.linear2(h_relu) - return y_pred - - -# N is batch size; D_in is input dimension; -# H is hidden dimension; D_out is output dimension. -N, D_in, H, D_out = 64, 1000, 100, 10 - -# Create random Tensors to hold inputs and outputs -x = torch.randn(N, D_in) -y = torch.randn(N, D_out) - -# Construct our model by instantiating the class defined above -model = TwoLayerNet(D_in, H, D_out) - -# Construct our loss function and an Optimizer. The call to model.parameters() -# in the SGD constructor will contain the learnable parameters of the two -# nn.Linear modules which are members of the model. -criterion = torch.nn.MSELoss(reduction='sum') -optimizer = torch.optim.SGD(model.parameters(), lr=1e-4) -for t in range(500): - # Forward pass: Compute predicted y by passing x to the model - y_pred = model(x) - - # Compute and print loss - loss = criterion(y_pred, y) - if t % 100 == 99: - print(t, loss.item()) - - # Zero gradients, perform a backward pass, and update the weights. - optimizer.zero_grad() - loss.backward() - optimizer.step() diff --git a/_downloads/two_layer_net_autograd.ipynb b/_downloads/two_layer_net_autograd.ipynb deleted file mode 100644 index a776bf7f45b..00000000000 --- a/_downloads/two_layer_net_autograd.ipynb +++ /dev/null @@ -1,54 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "%matplotlib inline" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\nPyTorch: Tensors and autograd\n-------------------------------\n\nA fully-connected ReLU network with one hidden layer and no biases, trained to\npredict y from x by minimizing squared Euclidean distance.\n\nThis implementation computes the forward pass using operations on PyTorch\nTensors, and uses PyTorch autograd to compute gradients.\n\n\nA PyTorch Tensor represents a node in a computational graph. If ``x`` is a\nTensor that has ``x.requires_grad=True`` then ``x.grad`` is another Tensor\nholding the gradient of ``x`` with respect to some scalar value.\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "import torch\n\ndtype = torch.float\ndevice = torch.device(\"cpu\")\n# device = torch.device(\"cuda:0\") # Uncomment this to run on GPU\n\n# N is batch size; D_in is input dimension;\n# H is hidden dimension; D_out is output dimension.\nN, D_in, H, D_out = 64, 1000, 100, 10\n\n# Create random Tensors to hold input and outputs.\n# Setting requires_grad=False indicates that we do not need to compute gradients\n# with respect to these Tensors during the backward pass.\nx = torch.randn(N, D_in, device=device, dtype=dtype)\ny = torch.randn(N, D_out, device=device, dtype=dtype)\n\n# Create random Tensors for weights.\n# Setting requires_grad=True indicates that we want to compute gradients with\n# respect to these Tensors during the backward pass.\nw1 = torch.randn(D_in, H, device=device, dtype=dtype, requires_grad=True)\nw2 = torch.randn(H, D_out, device=device, dtype=dtype, requires_grad=True)\n\nlearning_rate = 1e-6\nfor t in range(500):\n # Forward pass: compute predicted y using operations on Tensors; these\n # are exactly the same operations we used to compute the forward pass using\n # Tensors, but we do not need to keep references to intermediate values since\n # we are not implementing the backward pass by hand.\n y_pred = x.mm(w1).clamp(min=0).mm(w2)\n\n # Compute and print loss using operations on Tensors.\n # Now loss is a Tensor of shape (1,)\n # loss.item() gets the a scalar value held in the loss.\n loss = (y_pred - y).pow(2).sum()\n print(t, loss.item())\n\n # Use autograd to compute the backward pass. This call will compute the\n # gradient of loss with respect to all Tensors with requires_grad=True.\n # After this call w1.grad and w2.grad will be Tensors holding the gradient\n # of the loss with respect to w1 and w2 respectively.\n loss.backward()\n\n # Manually update weights using gradient descent. Wrap in torch.no_grad()\n # because weights have requires_grad=True, but we don't need to track this\n # in autograd.\n # An alternative way is to operate on weight.data and weight.grad.data.\n # Recall that tensor.data gives a tensor that shares the storage with\n # tensor, but doesn't track history.\n # You can also use torch.optim.SGD to achieve this.\n with torch.no_grad():\n w1 -= learning_rate * w1.grad\n w2 -= learning_rate * w2.grad\n\n # Manually zero the gradients after updating weights\n w1.grad.zero_()\n w2.grad.zero_()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.6" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} \ No newline at end of file diff --git a/_downloads/two_layer_net_autograd.py b/_downloads/two_layer_net_autograd.py deleted file mode 100644 index beab57a9afb..00000000000 --- a/_downloads/two_layer_net_autograd.py +++ /dev/null @@ -1,72 +0,0 @@ -# -*- coding: utf-8 -*- -""" -PyTorch: Tensors and autograd -------------------------------- - -A fully-connected ReLU network with one hidden layer and no biases, trained to -predict y from x by minimizing squared Euclidean distance. - -This implementation computes the forward pass using operations on PyTorch -Tensors, and uses PyTorch autograd to compute gradients. - - -A PyTorch Tensor represents a node in a computational graph. If ``x`` is a -Tensor that has ``x.requires_grad=True`` then ``x.grad`` is another Tensor -holding the gradient of ``x`` with respect to some scalar value. -""" -import torch - -dtype = torch.float -device = torch.device("cpu") -# device = torch.device("cuda:0") # Uncomment this to run on GPU - -# N is batch size; D_in is input dimension; -# H is hidden dimension; D_out is output dimension. -N, D_in, H, D_out = 64, 1000, 100, 10 - -# Create random Tensors to hold input and outputs. -# Setting requires_grad=False indicates that we do not need to compute gradients -# with respect to these Tensors during the backward pass. -x = torch.randn(N, D_in, device=device, dtype=dtype) -y = torch.randn(N, D_out, device=device, dtype=dtype) - -# Create random Tensors for weights. -# Setting requires_grad=True indicates that we want to compute gradients with -# respect to these Tensors during the backward pass. -w1 = torch.randn(D_in, H, device=device, dtype=dtype, requires_grad=True) -w2 = torch.randn(H, D_out, device=device, dtype=dtype, requires_grad=True) - -learning_rate = 1e-6 -for t in range(500): - # Forward pass: compute predicted y using operations on Tensors; these - # are exactly the same operations we used to compute the forward pass using - # Tensors, but we do not need to keep references to intermediate values since - # we are not implementing the backward pass by hand. - y_pred = x.mm(w1).clamp(min=0).mm(w2) - - # Compute and print loss using operations on Tensors. - # Now loss is a Tensor of shape (1,) - # loss.item() gets the a scalar value held in the loss. - loss = (y_pred - y).pow(2).sum() - print(t, loss.item()) - - # Use autograd to compute the backward pass. This call will compute the - # gradient of loss with respect to all Tensors with requires_grad=True. - # After this call w1.grad and w2.grad will be Tensors holding the gradient - # of the loss with respect to w1 and w2 respectively. - loss.backward() - - # Manually update weights using gradient descent. Wrap in torch.no_grad() - # because weights have requires_grad=True, but we don't need to track this - # in autograd. - # An alternative way is to operate on weight.data and weight.grad.data. - # Recall that tensor.data gives a tensor that shares the storage with - # tensor, but doesn't track history. - # You can also use torch.optim.SGD to achieve this. - with torch.no_grad(): - w1 -= learning_rate * w1.grad - w2 -= learning_rate * w2.grad - - # Manually zero the gradients after updating weights - w1.grad.zero_() - w2.grad.zero_() diff --git a/_downloads/two_layer_net_custom_function.ipynb b/_downloads/two_layer_net_custom_function.ipynb deleted file mode 100644 index 01e761ba7de..00000000000 --- a/_downloads/two_layer_net_custom_function.ipynb +++ /dev/null @@ -1,54 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "%matplotlib inline" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\nPyTorch: Defining New autograd Functions\n----------------------------------------\n\nA fully-connected ReLU network with one hidden layer and no biases, trained to\npredict y from x by minimizing squared Euclidean distance.\n\nThis implementation computes the forward pass using operations on PyTorch\nVariables, and uses PyTorch autograd to compute gradients.\n\nIn this implementation we implement our own custom autograd function to perform\nthe ReLU function.\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "import torch\n\n\nclass MyReLU(torch.autograd.Function):\n \"\"\"\n We can implement our own custom autograd Functions by subclassing\n torch.autograd.Function and implementing the forward and backward passes\n which operate on Tensors.\n \"\"\"\n\n @staticmethod\n def forward(ctx, input):\n \"\"\"\n In the forward pass we receive a Tensor containing the input and return\n a Tensor containing the output. ctx is a context object that can be used\n to stash information for backward computation. You can cache arbitrary\n objects for use in the backward pass using the ctx.save_for_backward method.\n \"\"\"\n ctx.save_for_backward(input)\n return input.clamp(min=0)\n\n @staticmethod\n def backward(ctx, grad_output):\n \"\"\"\n In the backward pass we receive a Tensor containing the gradient of the loss\n with respect to the output, and we need to compute the gradient of the loss\n with respect to the input.\n \"\"\"\n input, = ctx.saved_tensors\n grad_input = grad_output.clone()\n grad_input[input < 0] = 0\n return grad_input\n\n\ndtype = torch.float\ndevice = torch.device(\"cpu\")\n# device = torch.device(\"cuda:0\") # Uncomment this to run on GPU\n\n# N is batch size; D_in is input dimension;\n# H is hidden dimension; D_out is output dimension.\nN, D_in, H, D_out = 64, 1000, 100, 10\n\n# Create random Tensors to hold input and outputs.\nx = torch.randn(N, D_in, device=device, dtype=dtype)\ny = torch.randn(N, D_out, device=device, dtype=dtype)\n\n# Create random Tensors for weights.\nw1 = torch.randn(D_in, H, device=device, dtype=dtype, requires_grad=True)\nw2 = torch.randn(H, D_out, device=device, dtype=dtype, requires_grad=True)\n\nlearning_rate = 1e-6\nfor t in range(500):\n # To apply our Function, we use Function.apply method. We alias this as 'relu'.\n relu = MyReLU.apply\n\n # Forward pass: compute predicted y using operations; we compute\n # ReLU using our custom autograd operation.\n y_pred = relu(x.mm(w1)).mm(w2)\n\n # Compute and print loss\n loss = (y_pred - y).pow(2).sum()\n print(t, loss.item())\n\n # Use autograd to compute the backward pass.\n loss.backward()\n\n # Update weights using gradient descent\n with torch.no_grad():\n w1 -= learning_rate * w1.grad\n w2 -= learning_rate * w2.grad\n\n # Manually zero the gradients after updating weights\n w1.grad.zero_()\n w2.grad.zero_()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.6" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} \ No newline at end of file diff --git a/_downloads/two_layer_net_custom_function.py b/_downloads/two_layer_net_custom_function.py deleted file mode 100644 index fc4452c53a0..00000000000 --- a/_downloads/two_layer_net_custom_function.py +++ /dev/null @@ -1,88 +0,0 @@ -# -*- coding: utf-8 -*- -""" -PyTorch: Defining New autograd Functions ----------------------------------------- - -A fully-connected ReLU network with one hidden layer and no biases, trained to -predict y from x by minimizing squared Euclidean distance. - -This implementation computes the forward pass using operations on PyTorch -Variables, and uses PyTorch autograd to compute gradients. - -In this implementation we implement our own custom autograd function to perform -the ReLU function. -""" -import torch - - -class MyReLU(torch.autograd.Function): - """ - We can implement our own custom autograd Functions by subclassing - torch.autograd.Function and implementing the forward and backward passes - which operate on Tensors. - """ - - @staticmethod - def forward(ctx, input): - """ - In the forward pass we receive a Tensor containing the input and return - a Tensor containing the output. ctx is a context object that can be used - to stash information for backward computation. You can cache arbitrary - objects for use in the backward pass using the ctx.save_for_backward method. - """ - ctx.save_for_backward(input) - return input.clamp(min=0) - - @staticmethod - def backward(ctx, grad_output): - """ - In the backward pass we receive a Tensor containing the gradient of the loss - with respect to the output, and we need to compute the gradient of the loss - with respect to the input. - """ - input, = ctx.saved_tensors - grad_input = grad_output.clone() - grad_input[input < 0] = 0 - return grad_input - - -dtype = torch.float -device = torch.device("cpu") -# device = torch.device("cuda:0") # Uncomment this to run on GPU - -# N is batch size; D_in is input dimension; -# H is hidden dimension; D_out is output dimension. -N, D_in, H, D_out = 64, 1000, 100, 10 - -# Create random Tensors to hold input and outputs. -x = torch.randn(N, D_in, device=device, dtype=dtype) -y = torch.randn(N, D_out, device=device, dtype=dtype) - -# Create random Tensors for weights. -w1 = torch.randn(D_in, H, device=device, dtype=dtype, requires_grad=True) -w2 = torch.randn(H, D_out, device=device, dtype=dtype, requires_grad=True) - -learning_rate = 1e-6 -for t in range(500): - # To apply our Function, we use Function.apply method. We alias this as 'relu'. - relu = MyReLU.apply - - # Forward pass: compute predicted y using operations; we compute - # ReLU using our custom autograd operation. - y_pred = relu(x.mm(w1)).mm(w2) - - # Compute and print loss - loss = (y_pred - y).pow(2).sum() - print(t, loss.item()) - - # Use autograd to compute the backward pass. - loss.backward() - - # Update weights using gradient descent - with torch.no_grad(): - w1 -= learning_rate * w1.grad - w2 -= learning_rate * w2.grad - - # Manually zero the gradients after updating weights - w1.grad.zero_() - w2.grad.zero_() diff --git a/_downloads/two_layer_net_module.ipynb b/_downloads/two_layer_net_module.ipynb deleted file mode 100644 index 4b0d4ac9aff..00000000000 --- a/_downloads/two_layer_net_module.ipynb +++ /dev/null @@ -1,54 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "%matplotlib inline" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\nPyTorch: Custom nn Modules\n--------------------------\n\nA fully-connected ReLU network with one hidden layer, trained to predict y from x\nby minimizing squared Euclidean distance.\n\nThis implementation defines the model as a custom Module subclass. Whenever you\nwant a model more complex than a simple sequence of existing Modules you will\nneed to define your model this way.\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "import torch\n\n\nclass TwoLayerNet(torch.nn.Module):\n def __init__(self, D_in, H, D_out):\n \"\"\"\n In the constructor we instantiate two nn.Linear modules and assign them as\n member variables.\n \"\"\"\n super(TwoLayerNet, self).__init__()\n self.linear1 = torch.nn.Linear(D_in, H)\n self.linear2 = torch.nn.Linear(H, D_out)\n\n def forward(self, x):\n \"\"\"\n In the forward function we accept a Tensor of input data and we must return\n a Tensor of output data. We can use Modules defined in the constructor as\n well as arbitrary operators on Tensors.\n \"\"\"\n h_relu = self.linear1(x).clamp(min=0)\n y_pred = self.linear2(h_relu)\n return y_pred\n\n\n# N is batch size; D_in is input dimension;\n# H is hidden dimension; D_out is output dimension.\nN, D_in, H, D_out = 64, 1000, 100, 10\n\n# Create random Tensors to hold inputs and outputs\nx = torch.randn(N, D_in)\ny = torch.randn(N, D_out)\n\n# Construct our model by instantiating the class defined above\nmodel = TwoLayerNet(D_in, H, D_out)\n\n# Construct our loss function and an Optimizer. The call to model.parameters()\n# in the SGD constructor will contain the learnable parameters of the two\n# nn.Linear modules which are members of the model.\ncriterion = torch.nn.MSELoss(reduction='sum')\noptimizer = torch.optim.SGD(model.parameters(), lr=1e-4)\nfor t in range(500):\n # Forward pass: Compute predicted y by passing x to the model\n y_pred = model(x)\n\n # Compute and print loss\n loss = criterion(y_pred, y)\n print(t, loss.item())\n\n # Zero gradients, perform a backward pass, and update the weights.\n optimizer.zero_grad()\n loss.backward()\n optimizer.step()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.6" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} \ No newline at end of file diff --git a/_downloads/two_layer_net_module.py b/_downloads/two_layer_net_module.py deleted file mode 100644 index bf010efc05c..00000000000 --- a/_downloads/two_layer_net_module.py +++ /dev/null @@ -1,64 +0,0 @@ -# -*- coding: utf-8 -*- -""" -PyTorch: Custom nn Modules --------------------------- - -A fully-connected ReLU network with one hidden layer, trained to predict y from x -by minimizing squared Euclidean distance. - -This implementation defines the model as a custom Module subclass. Whenever you -want a model more complex than a simple sequence of existing Modules you will -need to define your model this way. -""" -import torch - - -class TwoLayerNet(torch.nn.Module): - def __init__(self, D_in, H, D_out): - """ - In the constructor we instantiate two nn.Linear modules and assign them as - member variables. - """ - super(TwoLayerNet, self).__init__() - self.linear1 = torch.nn.Linear(D_in, H) - self.linear2 = torch.nn.Linear(H, D_out) - - def forward(self, x): - """ - In the forward function we accept a Tensor of input data and we must return - a Tensor of output data. We can use Modules defined in the constructor as - well as arbitrary operators on Tensors. - """ - h_relu = self.linear1(x).clamp(min=0) - y_pred = self.linear2(h_relu) - return y_pred - - -# N is batch size; D_in is input dimension; -# H is hidden dimension; D_out is output dimension. -N, D_in, H, D_out = 64, 1000, 100, 10 - -# Create random Tensors to hold inputs and outputs -x = torch.randn(N, D_in) -y = torch.randn(N, D_out) - -# Construct our model by instantiating the class defined above -model = TwoLayerNet(D_in, H, D_out) - -# Construct our loss function and an Optimizer. The call to model.parameters() -# in the SGD constructor will contain the learnable parameters of the two -# nn.Linear modules which are members of the model. -criterion = torch.nn.MSELoss(reduction='sum') -optimizer = torch.optim.SGD(model.parameters(), lr=1e-4) -for t in range(500): - # Forward pass: Compute predicted y by passing x to the model - y_pred = model(x) - - # Compute and print loss - loss = criterion(y_pred, y) - print(t, loss.item()) - - # Zero gradients, perform a backward pass, and update the weights. - optimizer.zero_grad() - loss.backward() - optimizer.step() diff --git a/_downloads/two_layer_net_nn.ipynb b/_downloads/two_layer_net_nn.ipynb deleted file mode 100644 index c0807c9bfcc..00000000000 --- a/_downloads/two_layer_net_nn.ipynb +++ /dev/null @@ -1,54 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "%matplotlib inline" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\nPyTorch: nn\n-----------\n\nA fully-connected ReLU network with one hidden layer, trained to predict y from x\nby minimizing squared Euclidean distance.\n\nThis implementation uses the nn package from PyTorch to build the network.\nPyTorch autograd makes it easy to define computational graphs and take gradients,\nbut raw autograd can be a bit too low-level for defining complex neural networks;\nthis is where the nn package can help. The nn package defines a set of Modules,\nwhich you can think of as a neural network layer that has produces output from\ninput and may have some trainable weights.\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "import torch\n\n# N is batch size; D_in is input dimension;\n# H is hidden dimension; D_out is output dimension.\nN, D_in, H, D_out = 64, 1000, 100, 10\n\n# Create random Tensors to hold inputs and outputs\nx = torch.randn(N, D_in)\ny = torch.randn(N, D_out)\n\n# Use the nn package to define our model as a sequence of layers. nn.Sequential\n# is a Module which contains other Modules, and applies them in sequence to\n# produce its output. Each Linear Module computes output from input using a\n# linear function, and holds internal Tensors for its weight and bias.\nmodel = torch.nn.Sequential(\n torch.nn.Linear(D_in, H),\n torch.nn.ReLU(),\n torch.nn.Linear(H, D_out),\n)\n\n# The nn package also contains definitions of popular loss functions; in this\n# case we will use Mean Squared Error (MSE) as our loss function.\nloss_fn = torch.nn.MSELoss(reduction='sum')\n\nlearning_rate = 1e-4\nfor t in range(500):\n # Forward pass: compute predicted y by passing x to the model. Module objects\n # override the __call__ operator so you can call them like functions. When\n # doing so you pass a Tensor of input data to the Module and it produces\n # a Tensor of output data.\n y_pred = model(x)\n\n # Compute and print loss. We pass Tensors containing the predicted and true\n # values of y, and the loss function returns a Tensor containing the\n # loss.\n loss = loss_fn(y_pred, y)\n print(t, loss.item())\n\n # Zero the gradients before running the backward pass.\n model.zero_grad()\n\n # Backward pass: compute gradient of the loss with respect to all the learnable\n # parameters of the model. Internally, the parameters of each Module are stored\n # in Tensors with requires_grad=True, so this call will compute gradients for\n # all learnable parameters in the model.\n loss.backward()\n\n # Update the weights using gradient descent. Each parameter is a Tensor, so\n # we can access its gradients like we did before.\n with torch.no_grad():\n for param in model.parameters():\n param -= learning_rate * param.grad" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.6" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} \ No newline at end of file diff --git a/_downloads/two_layer_net_nn.py b/_downloads/two_layer_net_nn.py deleted file mode 100644 index 51bf623b01b..00000000000 --- a/_downloads/two_layer_net_nn.py +++ /dev/null @@ -1,67 +0,0 @@ -# -*- coding: utf-8 -*- -""" -PyTorch: nn ------------ - -A fully-connected ReLU network with one hidden layer, trained to predict y from x -by minimizing squared Euclidean distance. - -This implementation uses the nn package from PyTorch to build the network. -PyTorch autograd makes it easy to define computational graphs and take gradients, -but raw autograd can be a bit too low-level for defining complex neural networks; -this is where the nn package can help. The nn package defines a set of Modules, -which you can think of as a neural network layer that has produces output from -input and may have some trainable weights. -""" -import torch - -# N is batch size; D_in is input dimension; -# H is hidden dimension; D_out is output dimension. -N, D_in, H, D_out = 64, 1000, 100, 10 - -# Create random Tensors to hold inputs and outputs -x = torch.randn(N, D_in) -y = torch.randn(N, D_out) - -# Use the nn package to define our model as a sequence of layers. nn.Sequential -# is a Module which contains other Modules, and applies them in sequence to -# produce its output. Each Linear Module computes output from input using a -# linear function, and holds internal Tensors for its weight and bias. -model = torch.nn.Sequential( - torch.nn.Linear(D_in, H), - torch.nn.ReLU(), - torch.nn.Linear(H, D_out), -) - -# The nn package also contains definitions of popular loss functions; in this -# case we will use Mean Squared Error (MSE) as our loss function. -loss_fn = torch.nn.MSELoss(reduction='sum') - -learning_rate = 1e-4 -for t in range(500): - # Forward pass: compute predicted y by passing x to the model. Module objects - # override the __call__ operator so you can call them like functions. When - # doing so you pass a Tensor of input data to the Module and it produces - # a Tensor of output data. - y_pred = model(x) - - # Compute and print loss. We pass Tensors containing the predicted and true - # values of y, and the loss function returns a Tensor containing the - # loss. - loss = loss_fn(y_pred, y) - print(t, loss.item()) - - # Zero the gradients before running the backward pass. - model.zero_grad() - - # Backward pass: compute gradient of the loss with respect to all the learnable - # parameters of the model. Internally, the parameters of each Module are stored - # in Tensors with requires_grad=True, so this call will compute gradients for - # all learnable parameters in the model. - loss.backward() - - # Update the weights using gradient descent. Each parameter is a Tensor, so - # we can access its gradients like we did before. - with torch.no_grad(): - for param in model.parameters(): - param -= learning_rate * param.grad diff --git a/_downloads/two_layer_net_numpy.ipynb b/_downloads/two_layer_net_numpy.ipynb deleted file mode 100644 index 4b9cdd79bd0..00000000000 --- a/_downloads/two_layer_net_numpy.ipynb +++ /dev/null @@ -1,54 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "%matplotlib inline" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\nWarm-up: numpy\n--------------\n\nA fully-connected ReLU network with one hidden layer and no biases, trained to\npredict y from x using Euclidean error.\n\nThis implementation uses numpy to manually compute the forward pass, loss, and\nbackward pass.\n\nA numpy array is a generic n-dimensional array; it does not know anything about\ndeep learning or gradients or computational graphs, and is just a way to perform\ngeneric numeric computations.\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "import numpy as np\n\n# N is batch size; D_in is input dimension;\n# H is hidden dimension; D_out is output dimension.\nN, D_in, H, D_out = 64, 1000, 100, 10\n\n# Create random input and output data\nx = np.random.randn(N, D_in)\ny = np.random.randn(N, D_out)\n\n# Randomly initialize weights\nw1 = np.random.randn(D_in, H)\nw2 = np.random.randn(H, D_out)\n\nlearning_rate = 1e-6\nfor t in range(500):\n # Forward pass: compute predicted y\n h = x.dot(w1)\n h_relu = np.maximum(h, 0)\n y_pred = h_relu.dot(w2)\n\n # Compute and print loss\n loss = np.square(y_pred - y).sum()\n print(t, loss)\n\n # Backprop to compute gradients of w1 and w2 with respect to loss\n grad_y_pred = 2.0 * (y_pred - y)\n grad_w2 = h_relu.T.dot(grad_y_pred)\n grad_h_relu = grad_y_pred.dot(w2.T)\n grad_h = grad_h_relu.copy()\n grad_h[h < 0] = 0\n grad_w1 = x.T.dot(grad_h)\n\n # Update weights\n w1 -= learning_rate * grad_w1\n w2 -= learning_rate * grad_w2" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.6" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} \ No newline at end of file diff --git a/_downloads/two_layer_net_numpy.py b/_downloads/two_layer_net_numpy.py deleted file mode 100644 index f003d0f002b..00000000000 --- a/_downloads/two_layer_net_numpy.py +++ /dev/null @@ -1,51 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Warm-up: numpy --------------- - -A fully-connected ReLU network with one hidden layer and no biases, trained to -predict y from x using Euclidean error. - -This implementation uses numpy to manually compute the forward pass, loss, and -backward pass. - -A numpy array is a generic n-dimensional array; it does not know anything about -deep learning or gradients or computational graphs, and is just a way to perform -generic numeric computations. -""" -import numpy as np - -# N is batch size; D_in is input dimension; -# H is hidden dimension; D_out is output dimension. -N, D_in, H, D_out = 64, 1000, 100, 10 - -# Create random input and output data -x = np.random.randn(N, D_in) -y = np.random.randn(N, D_out) - -# Randomly initialize weights -w1 = np.random.randn(D_in, H) -w2 = np.random.randn(H, D_out) - -learning_rate = 1e-6 -for t in range(500): - # Forward pass: compute predicted y - h = x.dot(w1) - h_relu = np.maximum(h, 0) - y_pred = h_relu.dot(w2) - - # Compute and print loss - loss = np.square(y_pred - y).sum() - print(t, loss) - - # Backprop to compute gradients of w1 and w2 with respect to loss - grad_y_pred = 2.0 * (y_pred - y) - grad_w2 = h_relu.T.dot(grad_y_pred) - grad_h_relu = grad_y_pred.dot(w2.T) - grad_h = grad_h_relu.copy() - grad_h[h < 0] = 0 - grad_w1 = x.T.dot(grad_h) - - # Update weights - w1 -= learning_rate * grad_w1 - w2 -= learning_rate * grad_w2 diff --git a/_downloads/two_layer_net_optim.ipynb b/_downloads/two_layer_net_optim.ipynb deleted file mode 100644 index 24f2091e161..00000000000 --- a/_downloads/two_layer_net_optim.ipynb +++ /dev/null @@ -1,54 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "%matplotlib inline" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\nPyTorch: optim\n--------------\n\nA fully-connected ReLU network with one hidden layer, trained to predict y from x\nby minimizing squared Euclidean distance.\n\nThis implementation uses the nn package from PyTorch to build the network.\n\nRather than manually updating the weights of the model as we have been doing,\nwe use the optim package to define an Optimizer that will update the weights\nfor us. The optim package defines many optimization algorithms that are commonly\nused for deep learning, including SGD+momentum, RMSProp, Adam, etc.\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "import torch\n\n# N is batch size; D_in is input dimension;\n# H is hidden dimension; D_out is output dimension.\nN, D_in, H, D_out = 64, 1000, 100, 10\n\n# Create random Tensors to hold inputs and outputs\nx = torch.randn(N, D_in)\ny = torch.randn(N, D_out)\n\n# Use the nn package to define our model and loss function.\nmodel = torch.nn.Sequential(\n torch.nn.Linear(D_in, H),\n torch.nn.ReLU(),\n torch.nn.Linear(H, D_out),\n)\nloss_fn = torch.nn.MSELoss(reduction='sum')\n\n# Use the optim package to define an Optimizer that will update the weights of\n# the model for us. Here we will use Adam; the optim package contains many other\n# optimization algoriths. The first argument to the Adam constructor tells the\n# optimizer which Tensors it should update.\nlearning_rate = 1e-4\noptimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)\nfor t in range(500):\n # Forward pass: compute predicted y by passing x to the model.\n y_pred = model(x)\n\n # Compute and print loss.\n loss = loss_fn(y_pred, y)\n print(t, loss.item())\n\n # Before the backward pass, use the optimizer object to zero all of the\n # gradients for the variables it will update (which are the learnable\n # weights of the model). This is because by default, gradients are\n # accumulated in buffers( i.e, not overwritten) whenever .backward()\n # is called. Checkout docs of torch.autograd.backward for more details.\n optimizer.zero_grad()\n\n # Backward pass: compute gradient of the loss with respect to model\n # parameters\n loss.backward()\n\n # Calling the step function on an Optimizer makes an update to its\n # parameters\n optimizer.step()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.6" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} \ No newline at end of file diff --git a/_downloads/two_layer_net_optim.py b/_downloads/two_layer_net_optim.py deleted file mode 100644 index 62c44cd0911..00000000000 --- a/_downloads/two_layer_net_optim.py +++ /dev/null @@ -1,61 +0,0 @@ -# -*- coding: utf-8 -*- -""" -PyTorch: optim --------------- - -A fully-connected ReLU network with one hidden layer, trained to predict y from x -by minimizing squared Euclidean distance. - -This implementation uses the nn package from PyTorch to build the network. - -Rather than manually updating the weights of the model as we have been doing, -we use the optim package to define an Optimizer that will update the weights -for us. The optim package defines many optimization algorithms that are commonly -used for deep learning, including SGD+momentum, RMSProp, Adam, etc. -""" -import torch - -# N is batch size; D_in is input dimension; -# H is hidden dimension; D_out is output dimension. -N, D_in, H, D_out = 64, 1000, 100, 10 - -# Create random Tensors to hold inputs and outputs -x = torch.randn(N, D_in) -y = torch.randn(N, D_out) - -# Use the nn package to define our model and loss function. -model = torch.nn.Sequential( - torch.nn.Linear(D_in, H), - torch.nn.ReLU(), - torch.nn.Linear(H, D_out), -) -loss_fn = torch.nn.MSELoss(reduction='sum') - -# Use the optim package to define an Optimizer that will update the weights of -# the model for us. Here we will use Adam; the optim package contains many other -# optimization algoriths. The first argument to the Adam constructor tells the -# optimizer which Tensors it should update. -learning_rate = 1e-4 -optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) -for t in range(500): - # Forward pass: compute predicted y by passing x to the model. - y_pred = model(x) - - # Compute and print loss. - loss = loss_fn(y_pred, y) - print(t, loss.item()) - - # Before the backward pass, use the optimizer object to zero all of the - # gradients for the variables it will update (which are the learnable - # weights of the model). This is because by default, gradients are - # accumulated in buffers( i.e, not overwritten) whenever .backward() - # is called. Checkout docs of torch.autograd.backward for more details. - optimizer.zero_grad() - - # Backward pass: compute gradient of the loss with respect to model - # parameters - loss.backward() - - # Calling the step function on an Optimizer makes an update to its - # parameters - optimizer.step() diff --git a/_downloads/two_layer_net_tensor.ipynb b/_downloads/two_layer_net_tensor.ipynb deleted file mode 100644 index 26020422b15..00000000000 --- a/_downloads/two_layer_net_tensor.ipynb +++ /dev/null @@ -1,54 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "%matplotlib inline" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\nPyTorch: Tensors\n----------------\n\nA fully-connected ReLU network with one hidden layer and no biases, trained to\npredict y from x by minimizing squared Euclidean distance.\n\nThis implementation uses PyTorch tensors to manually compute the forward pass,\nloss, and backward pass.\n\nA PyTorch Tensor is basically the same as a numpy array: it does not know\nanything about deep learning or computational graphs or gradients, and is just\na generic n-dimensional array to be used for arbitrary numeric computation.\n\nThe biggest difference between a numpy array and a PyTorch Tensor is that\na PyTorch Tensor can run on either CPU or GPU. To run operations on the GPU,\njust cast the Tensor to a cuda datatype.\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "import torch\n\n\ndtype = torch.float\ndevice = torch.device(\"cpu\")\n# device = torch.device(\"cuda:0\") # Uncomment this to run on GPU\n\n# N is batch size; D_in is input dimension;\n# H is hidden dimension; D_out is output dimension.\nN, D_in, H, D_out = 64, 1000, 100, 10\n\n# Create random input and output data\nx = torch.randn(N, D_in, device=device, dtype=dtype)\ny = torch.randn(N, D_out, device=device, dtype=dtype)\n\n# Randomly initialize weights\nw1 = torch.randn(D_in, H, device=device, dtype=dtype)\nw2 = torch.randn(H, D_out, device=device, dtype=dtype)\n\nlearning_rate = 1e-6\nfor t in range(500):\n # Forward pass: compute predicted y\n h = x.mm(w1)\n h_relu = h.clamp(min=0)\n y_pred = h_relu.mm(w2)\n\n # Compute and print loss\n loss = (y_pred - y).pow(2).sum().item()\n print(t, loss)\n\n # Backprop to compute gradients of w1 and w2 with respect to loss\n grad_y_pred = 2.0 * (y_pred - y)\n grad_w2 = h_relu.t().mm(grad_y_pred)\n grad_h_relu = grad_y_pred.mm(w2.t())\n grad_h = grad_h_relu.clone()\n grad_h[h < 0] = 0\n grad_w1 = x.t().mm(grad_h)\n\n # Update weights using gradient descent\n w1 -= learning_rate * grad_w1\n w2 -= learning_rate * grad_w2" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.6" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} \ No newline at end of file diff --git a/_downloads/two_layer_net_tensor.py b/_downloads/two_layer_net_tensor.py deleted file mode 100644 index e3cb4949bbf..00000000000 --- a/_downloads/two_layer_net_tensor.py +++ /dev/null @@ -1,61 +0,0 @@ -# -*- coding: utf-8 -*- -""" -PyTorch: Tensors ----------------- - -A fully-connected ReLU network with one hidden layer and no biases, trained to -predict y from x by minimizing squared Euclidean distance. - -This implementation uses PyTorch tensors to manually compute the forward pass, -loss, and backward pass. - -A PyTorch Tensor is basically the same as a numpy array: it does not know -anything about deep learning or computational graphs or gradients, and is just -a generic n-dimensional array to be used for arbitrary numeric computation. - -The biggest difference between a numpy array and a PyTorch Tensor is that -a PyTorch Tensor can run on either CPU or GPU. To run operations on the GPU, -just cast the Tensor to a cuda datatype. -""" - -import torch - - -dtype = torch.float -device = torch.device("cpu") -# device = torch.device("cuda:0") # Uncomment this to run on GPU - -# N is batch size; D_in is input dimension; -# H is hidden dimension; D_out is output dimension. -N, D_in, H, D_out = 64, 1000, 100, 10 - -# Create random input and output data -x = torch.randn(N, D_in, device=device, dtype=dtype) -y = torch.randn(N, D_out, device=device, dtype=dtype) - -# Randomly initialize weights -w1 = torch.randn(D_in, H, device=device, dtype=dtype) -w2 = torch.randn(H, D_out, device=device, dtype=dtype) - -learning_rate = 1e-6 -for t in range(500): - # Forward pass: compute predicted y - h = x.mm(w1) - h_relu = h.clamp(min=0) - y_pred = h_relu.mm(w2) - - # Compute and print loss - loss = (y_pred - y).pow(2).sum().item() - print(t, loss) - - # Backprop to compute gradients of w1 and w2 with respect to loss - grad_y_pred = 2.0 * (y_pred - y) - grad_w2 = h_relu.t().mm(grad_y_pred) - grad_h_relu = grad_y_pred.mm(w2.t()) - grad_h = grad_h_relu.clone() - grad_h[h < 0] = 0 - grad_w1 = x.t().mm(grad_h) - - # Update weights using gradient descent - w1 -= learning_rate * grad_w1 - w2 -= learning_rate * grad_w2 diff --git a/_images/sphx_glr_two_layer_net_autograd_thumb.png b/_images/sphx_glr_two_layer_net_autograd_thumb.png deleted file mode 100644 index 233f8e605ef..00000000000 Binary files a/_images/sphx_glr_two_layer_net_autograd_thumb.png and /dev/null differ diff --git a/_images/sphx_glr_two_layer_net_custom_function_thumb.png b/_images/sphx_glr_two_layer_net_custom_function_thumb.png deleted file mode 100644 index 233f8e605ef..00000000000 Binary files a/_images/sphx_glr_two_layer_net_custom_function_thumb.png and /dev/null differ diff --git a/_images/sphx_glr_two_layer_net_module_thumb.png b/_images/sphx_glr_two_layer_net_module_thumb.png deleted file mode 100644 index 233f8e605ef..00000000000 Binary files a/_images/sphx_glr_two_layer_net_module_thumb.png and /dev/null differ diff --git a/_images/sphx_glr_two_layer_net_nn_thumb.png b/_images/sphx_glr_two_layer_net_nn_thumb.png deleted file mode 100644 index 233f8e605ef..00000000000 Binary files a/_images/sphx_glr_two_layer_net_nn_thumb.png and /dev/null differ diff --git a/_images/sphx_glr_two_layer_net_numpy_thumb.png b/_images/sphx_glr_two_layer_net_numpy_thumb.png deleted file mode 100644 index 233f8e605ef..00000000000 Binary files a/_images/sphx_glr_two_layer_net_numpy_thumb.png and /dev/null differ diff --git a/_images/sphx_glr_two_layer_net_optim_thumb.png b/_images/sphx_glr_two_layer_net_optim_thumb.png deleted file mode 100644 index 233f8e605ef..00000000000 Binary files a/_images/sphx_glr_two_layer_net_optim_thumb.png and /dev/null differ diff --git a/_images/sphx_glr_two_layer_net_tensor_thumb.png b/_images/sphx_glr_two_layer_net_tensor_thumb.png deleted file mode 100644 index 233f8e605ef..00000000000 Binary files a/_images/sphx_glr_two_layer_net_tensor_thumb.png and /dev/null differ diff --git a/_sources/beginner/examples_autograd/two_layer_net_autograd.rst.txt b/_sources/beginner/examples_autograd/two_layer_net_autograd.rst.txt deleted file mode 100644 index db61d44aa15..00000000000 --- a/_sources/beginner/examples_autograd/two_layer_net_autograd.rst.txt +++ /dev/null @@ -1,124 +0,0 @@ -.. note:: - :class: sphx-glr-download-link-note - - Click :ref:`here ` to download the full example code -.. rst-class:: sphx-glr-example-title - -.. _sphx_glr_beginner_examples_autograd_two_layer_net_autograd.py: - - -PyTorch: Tensors and autograd -------------------------------- - -A fully-connected ReLU network with one hidden layer and no biases, trained to -predict y from x by minimizing squared Euclidean distance. - -This implementation computes the forward pass using operations on PyTorch -Tensors, and uses PyTorch autograd to compute gradients. - - -A PyTorch Tensor represents a node in a computational graph. If ``x`` is a -Tensor that has ``x.requires_grad=True`` then ``x.grad`` is another Tensor -holding the gradient of ``x`` with respect to some scalar value. - - -.. code-block:: default - - import torch - - dtype = torch.float - device = torch.device("cpu") - # device = torch.device("cuda:0") # Uncomment this to run on GPU - # torch.backends.cuda.matmul.allow_tf32 = False # Uncomment this to run on GPU - - # The above line disables TensorFloat32. This a feature that allows - # networks to run at a much faster speed while sacrificing precision. - # Although TensorFloat32 works well on most real models, for our toy model - # in this tutorial, the sacrificed precision causes convergence issue. - # For more information, see: - # https://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-devices - - # N is batch size; D_in is input dimension; - # H is hidden dimension; D_out is output dimension. - N, D_in, H, D_out = 64, 1000, 100, 10 - - # Create random Tensors to hold input and outputs. - # Setting requires_grad=False indicates that we do not need to compute gradients - # with respect to these Tensors during the backward pass. - x = torch.randn(N, D_in, device=device, dtype=dtype) - y = torch.randn(N, D_out, device=device, dtype=dtype) - - # Create random Tensors for weights. - # Setting requires_grad=True indicates that we want to compute gradients with - # respect to these Tensors during the backward pass. - w1 = torch.randn(D_in, H, device=device, dtype=dtype, requires_grad=True) - w2 = torch.randn(H, D_out, device=device, dtype=dtype, requires_grad=True) - - learning_rate = 1e-6 - for t in range(500): - # Forward pass: compute predicted y using operations on Tensors; these - # are exactly the same operations we used to compute the forward pass using - # Tensors, but we do not need to keep references to intermediate values since - # we are not implementing the backward pass by hand. - y_pred = x.mm(w1).clamp(min=0).mm(w2) - - # Compute and print loss using operations on Tensors. - # Now loss is a Tensor of shape (1,) - # loss.item() gets the scalar value held in the loss. - loss = (y_pred - y).pow(2).sum() - if t % 100 == 99: - print(t, loss.item()) - - # Use autograd to compute the backward pass. This call will compute the - # gradient of loss with respect to all Tensors with requires_grad=True. - # After this call w1.grad and w2.grad will be Tensors holding the gradient - # of the loss with respect to w1 and w2 respectively. - loss.backward() - - # Manually update weights using gradient descent. Wrap in torch.no_grad() - # because weights have requires_grad=True, but we don't need to track this - # in autograd. - # An alternative way is to operate on weight.data and weight.grad.data. - # Recall that tensor.data gives a tensor that shares the storage with - # tensor, but doesn't track history. - # You can also use torch.optim.SGD to achieve this. - with torch.no_grad(): - w1 -= learning_rate * w1.grad - w2 -= learning_rate * w2.grad - - # Manually zero the gradients after updating weights - w1.grad.zero_() - w2.grad.zero_() - - -.. rst-class:: sphx-glr-timing - - **Total running time of the script:** ( 0 minutes 0.000 seconds) - - -.. _sphx_glr_download_beginner_examples_autograd_two_layer_net_autograd.py: - - -.. only :: html - - .. container:: sphx-glr-footer - :class: sphx-glr-footer-example - - - - .. container:: sphx-glr-download - - :download:`Download Python source code: two_layer_net_autograd.py ` - - - - .. container:: sphx-glr-download - - :download:`Download Jupyter notebook: two_layer_net_autograd.ipynb ` - - -.. only:: html - - .. rst-class:: sphx-glr-signature - - `Gallery generated by Sphinx-Gallery `_ diff --git a/_sources/beginner/examples_autograd/two_layer_net_custom_function.rst.txt b/_sources/beginner/examples_autograd/two_layer_net_custom_function.rst.txt deleted file mode 100644 index 4ddd0580a97..00000000000 --- a/_sources/beginner/examples_autograd/two_layer_net_custom_function.rst.txt +++ /dev/null @@ -1,140 +0,0 @@ -.. note:: - :class: sphx-glr-download-link-note - - Click :ref:`here ` to download the full example code -.. rst-class:: sphx-glr-example-title - -.. _sphx_glr_beginner_examples_autograd_two_layer_net_custom_function.py: - - -PyTorch: Defining New autograd Functions ----------------------------------------- - -A fully-connected ReLU network with one hidden layer and no biases, trained to -predict y from x by minimizing squared Euclidean distance. - -This implementation computes the forward pass using operations on PyTorch -Variables, and uses PyTorch autograd to compute gradients. - -In this implementation we implement our own custom autograd function to perform -the ReLU function. - - -.. code-block:: default - - import torch - - - class MyReLU(torch.autograd.Function): - """ - We can implement our own custom autograd Functions by subclassing - torch.autograd.Function and implementing the forward and backward passes - which operate on Tensors. - """ - - @staticmethod - def forward(ctx, input): - """ - In the forward pass we receive a Tensor containing the input and return - a Tensor containing the output. ctx is a context object that can be used - to stash information for backward computation. You can cache arbitrary - objects for use in the backward pass using the ctx.save_for_backward method. - """ - ctx.save_for_backward(input) - return input.clamp(min=0) - - @staticmethod - def backward(ctx, grad_output): - """ - In the backward pass we receive a Tensor containing the gradient of the loss - with respect to the output, and we need to compute the gradient of the loss - with respect to the input. - """ - input, = ctx.saved_tensors - grad_input = grad_output.clone() - grad_input[input < 0] = 0 - return grad_input - - - dtype = torch.float - device = torch.device("cpu") - # device = torch.device("cuda:0") # Uncomment this to run on GPU - # torch.backends.cuda.matmul.allow_tf32 = False # Uncomment this to run on GPU - - # The above line disables TensorFloat32. This a feature that allows - # networks to run at a much faster speed while sacrificing precision. - # Although TensorFloat32 works well on most real models, for our toy model - # in this tutorial, the sacrificed precision causes convergence issue. - # For more information, see: - # https://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-devices - - # N is batch size; D_in is input dimension; - # H is hidden dimension; D_out is output dimension. - N, D_in, H, D_out = 64, 1000, 100, 10 - - # Create random Tensors to hold input and outputs. - x = torch.randn(N, D_in, device=device, dtype=dtype) - y = torch.randn(N, D_out, device=device, dtype=dtype) - - # Create random Tensors for weights. - w1 = torch.randn(D_in, H, device=device, dtype=dtype, requires_grad=True) - w2 = torch.randn(H, D_out, device=device, dtype=dtype, requires_grad=True) - - learning_rate = 1e-6 - for t in range(500): - # To apply our Function, we use Function.apply method. We alias this as 'relu'. - relu = MyReLU.apply - - # Forward pass: compute predicted y using operations; we compute - # ReLU using our custom autograd operation. - y_pred = relu(x.mm(w1)).mm(w2) - - # Compute and print loss - loss = (y_pred - y).pow(2).sum() - if t % 100 == 99: - print(t, loss.item()) - - # Use autograd to compute the backward pass. - loss.backward() - - # Update weights using gradient descent - with torch.no_grad(): - w1 -= learning_rate * w1.grad - w2 -= learning_rate * w2.grad - - # Manually zero the gradients after updating weights - w1.grad.zero_() - w2.grad.zero_() - - -.. rst-class:: sphx-glr-timing - - **Total running time of the script:** ( 0 minutes 0.000 seconds) - - -.. _sphx_glr_download_beginner_examples_autograd_two_layer_net_custom_function.py: - - -.. only :: html - - .. container:: sphx-glr-footer - :class: sphx-glr-footer-example - - - - .. container:: sphx-glr-download - - :download:`Download Python source code: two_layer_net_custom_function.py ` - - - - .. container:: sphx-glr-download - - :download:`Download Jupyter notebook: two_layer_net_custom_function.ipynb ` - - -.. only:: html - - .. rst-class:: sphx-glr-signature - - `Gallery generated by Sphinx-Gallery `_ diff --git a/_sources/beginner/examples_nn/two_layer_net_module.rst.txt b/_sources/beginner/examples_nn/two_layer_net_module.rst.txt deleted file mode 100644 index 011ecc94f87..00000000000 --- a/_sources/beginner/examples_nn/two_layer_net_module.rst.txt +++ /dev/null @@ -1,108 +0,0 @@ -.. note:: - :class: sphx-glr-download-link-note - - Click :ref:`here ` to download the full example code -.. rst-class:: sphx-glr-example-title - -.. _sphx_glr_beginner_examples_nn_two_layer_net_module.py: - - -PyTorch: Custom nn Modules --------------------------- - -A fully-connected ReLU network with one hidden layer, trained to predict y from x -by minimizing squared Euclidean distance. - -This implementation defines the model as a custom Module subclass. Whenever you -want a model more complex than a simple sequence of existing Modules you will -need to define your model this way. - - -.. code-block:: default - - import torch - - - class TwoLayerNet(torch.nn.Module): - def __init__(self, D_in, H, D_out): - """ - In the constructor we instantiate two nn.Linear modules and assign them as - member variables. - """ - super(TwoLayerNet, self).__init__() - self.linear1 = torch.nn.Linear(D_in, H) - self.linear2 = torch.nn.Linear(H, D_out) - - def forward(self, x): - """ - In the forward function we accept a Tensor of input data and we must return - a Tensor of output data. We can use Modules defined in the constructor as - well as arbitrary operators on Tensors. - """ - h_relu = self.linear1(x).clamp(min=0) - y_pred = self.linear2(h_relu) - return y_pred - - - # N is batch size; D_in is input dimension; - # H is hidden dimension; D_out is output dimension. - N, D_in, H, D_out = 64, 1000, 100, 10 - - # Create random Tensors to hold inputs and outputs - x = torch.randn(N, D_in) - y = torch.randn(N, D_out) - - # Construct our model by instantiating the class defined above - model = TwoLayerNet(D_in, H, D_out) - - # Construct our loss function and an Optimizer. The call to model.parameters() - # in the SGD constructor will contain the learnable parameters of the two - # nn.Linear modules which are members of the model. - criterion = torch.nn.MSELoss(reduction='sum') - optimizer = torch.optim.SGD(model.parameters(), lr=1e-4) - for t in range(500): - # Forward pass: Compute predicted y by passing x to the model - y_pred = model(x) - - # Compute and print loss - loss = criterion(y_pred, y) - if t % 100 == 99: - print(t, loss.item()) - - # Zero gradients, perform a backward pass, and update the weights. - optimizer.zero_grad() - loss.backward() - optimizer.step() - - -.. rst-class:: sphx-glr-timing - - **Total running time of the script:** ( 0 minutes 0.000 seconds) - - -.. _sphx_glr_download_beginner_examples_nn_two_layer_net_module.py: - - -.. only :: html - - .. container:: sphx-glr-footer - :class: sphx-glr-footer-example - - - - .. container:: sphx-glr-download - - :download:`Download Python source code: two_layer_net_module.py ` - - - - .. container:: sphx-glr-download - - :download:`Download Jupyter notebook: two_layer_net_module.ipynb ` - - -.. only:: html - - .. rst-class:: sphx-glr-signature - - `Gallery generated by Sphinx-Gallery `_ diff --git a/_sources/beginner/examples_nn/two_layer_net_nn.rst.txt b/_sources/beginner/examples_nn/two_layer_net_nn.rst.txt deleted file mode 100644 index 4ea77a90fb1..00000000000 --- a/_sources/beginner/examples_nn/two_layer_net_nn.rst.txt +++ /dev/null @@ -1,111 +0,0 @@ -.. note:: - :class: sphx-glr-download-link-note - - Click :ref:`here ` to download the full example code -.. rst-class:: sphx-glr-example-title - -.. _sphx_glr_beginner_examples_nn_two_layer_net_nn.py: - - -PyTorch: nn ------------ - -A fully-connected ReLU network with one hidden layer, trained to predict y from x -by minimizing squared Euclidean distance. - -This implementation uses the nn package from PyTorch to build the network. -PyTorch autograd makes it easy to define computational graphs and take gradients, -but raw autograd can be a bit too low-level for defining complex neural networks; -this is where the nn package can help. The nn package defines a set of Modules, -which you can think of as a neural network layer that has produces output from -input and may have some trainable weights. - - -.. code-block:: default - - import torch - - # N is batch size; D_in is input dimension; - # H is hidden dimension; D_out is output dimension. - N, D_in, H, D_out = 64, 1000, 100, 10 - - # Create random Tensors to hold inputs and outputs - x = torch.randn(N, D_in) - y = torch.randn(N, D_out) - - # Use the nn package to define our model as a sequence of layers. nn.Sequential - # is a Module which contains other Modules, and applies them in sequence to - # produce its output. Each Linear Module computes output from input using a - # linear function, and holds internal Tensors for its weight and bias. - model = torch.nn.Sequential( - torch.nn.Linear(D_in, H), - torch.nn.ReLU(), - torch.nn.Linear(H, D_out), - ) - - # The nn package also contains definitions of popular loss functions; in this - # case we will use Mean Squared Error (MSE) as our loss function. - loss_fn = torch.nn.MSELoss(reduction='sum') - - learning_rate = 1e-4 - for t in range(500): - # Forward pass: compute predicted y by passing x to the model. Module objects - # override the __call__ operator so you can call them like functions. When - # doing so you pass a Tensor of input data to the Module and it produces - # a Tensor of output data. - y_pred = model(x) - - # Compute and print loss. We pass Tensors containing the predicted and true - # values of y, and the loss function returns a Tensor containing the - # loss. - loss = loss_fn(y_pred, y) - if t % 100 == 99: - print(t, loss.item()) - - # Zero the gradients before running the backward pass. - model.zero_grad() - - # Backward pass: compute gradient of the loss with respect to all the learnable - # parameters of the model. Internally, the parameters of each Module are stored - # in Tensors with requires_grad=True, so this call will compute gradients for - # all learnable parameters in the model. - loss.backward() - - # Update the weights using gradient descent. Each parameter is a Tensor, so - # we can access its gradients like we did before. - with torch.no_grad(): - for param in model.parameters(): - param -= learning_rate * param.grad - - -.. rst-class:: sphx-glr-timing - - **Total running time of the script:** ( 0 minutes 0.000 seconds) - - -.. _sphx_glr_download_beginner_examples_nn_two_layer_net_nn.py: - - -.. only :: html - - .. container:: sphx-glr-footer - :class: sphx-glr-footer-example - - - - .. container:: sphx-glr-download - - :download:`Download Python source code: two_layer_net_nn.py ` - - - - .. container:: sphx-glr-download - - :download:`Download Jupyter notebook: two_layer_net_nn.ipynb ` - - -.. only:: html - - .. rst-class:: sphx-glr-signature - - `Gallery generated by Sphinx-Gallery `_ diff --git a/_sources/beginner/examples_nn/two_layer_net_optim.rst.txt b/_sources/beginner/examples_nn/two_layer_net_optim.rst.txt deleted file mode 100644 index f446ee22650..00000000000 --- a/_sources/beginner/examples_nn/two_layer_net_optim.rst.txt +++ /dev/null @@ -1,105 +0,0 @@ -.. note:: - :class: sphx-glr-download-link-note - - Click :ref:`here ` to download the full example code -.. rst-class:: sphx-glr-example-title - -.. _sphx_glr_beginner_examples_nn_two_layer_net_optim.py: - - -PyTorch: optim --------------- - -A fully-connected ReLU network with one hidden layer, trained to predict y from x -by minimizing squared Euclidean distance. - -This implementation uses the nn package from PyTorch to build the network. - -Rather than manually updating the weights of the model as we have been doing, -we use the optim package to define an Optimizer that will update the weights -for us. The optim package defines many optimization algorithms that are commonly -used for deep learning, including SGD+momentum, RMSProp, Adam, etc. - - -.. code-block:: default - - import torch - - # N is batch size; D_in is input dimension; - # H is hidden dimension; D_out is output dimension. - N, D_in, H, D_out = 64, 1000, 100, 10 - - # Create random Tensors to hold inputs and outputs - x = torch.randn(N, D_in) - y = torch.randn(N, D_out) - - # Use the nn package to define our model and loss function. - model = torch.nn.Sequential( - torch.nn.Linear(D_in, H), - torch.nn.ReLU(), - torch.nn.Linear(H, D_out), - ) - loss_fn = torch.nn.MSELoss(reduction='sum') - - # Use the optim package to define an Optimizer that will update the weights of - # the model for us. Here we will use Adam; the optim package contains many other - # optimization algorithms. The first argument to the Adam constructor tells the - # optimizer which Tensors it should update. - learning_rate = 1e-4 - optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) - for t in range(500): - # Forward pass: compute predicted y by passing x to the model. - y_pred = model(x) - - # Compute and print loss. - loss = loss_fn(y_pred, y) - if t % 100 == 99: - print(t, loss.item()) - - # Before the backward pass, use the optimizer object to zero all of the - # gradients for the variables it will update (which are the learnable - # weights of the model). This is because by default, gradients are - # accumulated in buffers( i.e, not overwritten) whenever .backward() - # is called. Checkout docs of torch.autograd.backward for more details. - optimizer.zero_grad() - - # Backward pass: compute gradient of the loss with respect to model - # parameters - loss.backward() - - # Calling the step function on an Optimizer makes an update to its - # parameters - optimizer.step() - - -.. rst-class:: sphx-glr-timing - - **Total running time of the script:** ( 0 minutes 0.000 seconds) - - -.. _sphx_glr_download_beginner_examples_nn_two_layer_net_optim.py: - - -.. only :: html - - .. container:: sphx-glr-footer - :class: sphx-glr-footer-example - - - - .. container:: sphx-glr-download - - :download:`Download Python source code: two_layer_net_optim.py ` - - - - .. container:: sphx-glr-download - - :download:`Download Jupyter notebook: two_layer_net_optim.ipynb ` - - -.. only:: html - - .. rst-class:: sphx-glr-signature - - `Gallery generated by Sphinx-Gallery `_ diff --git a/_sources/beginner/examples_tensor/two_layer_net_numpy.rst.txt b/_sources/beginner/examples_tensor/two_layer_net_numpy.rst.txt deleted file mode 100644 index 5e2010d7848..00000000000 --- a/_sources/beginner/examples_tensor/two_layer_net_numpy.rst.txt +++ /dev/null @@ -1,94 +0,0 @@ -.. note:: - :class: sphx-glr-download-link-note - - Click :ref:`here ` to download the full example code -.. rst-class:: sphx-glr-example-title - -.. _sphx_glr_beginner_examples_tensor_two_layer_net_numpy.py: - - -Warm-up: numpy --------------- - -A fully-connected ReLU network with one hidden layer and no biases, trained to -predict y from x using Euclidean error. - -This implementation uses numpy to manually compute the forward pass, loss, and -backward pass. - -A numpy array is a generic n-dimensional array; it does not know anything about -deep learning or gradients or computational graphs, and is just a way to perform -generic numeric computations. - - -.. code-block:: default - - import numpy as np - - # N is batch size; D_in is input dimension; - # H is hidden dimension; D_out is output dimension. - N, D_in, H, D_out = 64, 1000, 100, 10 - - # Create random input and output data - x = np.random.randn(N, D_in) - y = np.random.randn(N, D_out) - - # Randomly initialize weights - w1 = np.random.randn(D_in, H) - w2 = np.random.randn(H, D_out) - - learning_rate = 1e-6 - for t in range(500): - # Forward pass: compute predicted y - h = x.dot(w1) - h_relu = np.maximum(h, 0) - y_pred = h_relu.dot(w2) - - # Compute and print loss - loss = np.square(y_pred - y).sum() - print(t, loss) - - # Backprop to compute gradients of w1 and w2 with respect to loss - grad_y_pred = 2.0 * (y_pred - y) - grad_w2 = h_relu.T.dot(grad_y_pred) - grad_h_relu = grad_y_pred.dot(w2.T) - grad_h = grad_h_relu.copy() - grad_h[h < 0] = 0 - grad_w1 = x.T.dot(grad_h) - - # Update weights - w1 -= learning_rate * grad_w1 - w2 -= learning_rate * grad_w2 - - -.. rst-class:: sphx-glr-timing - - **Total running time of the script:** ( 0 minutes 0.000 seconds) - - -.. _sphx_glr_download_beginner_examples_tensor_two_layer_net_numpy.py: - - -.. only :: html - - .. container:: sphx-glr-footer - :class: sphx-glr-footer-example - - - - .. container:: sphx-glr-download - - :download:`Download Python source code: two_layer_net_numpy.py ` - - - - .. container:: sphx-glr-download - - :download:`Download Jupyter notebook: two_layer_net_numpy.ipynb ` - - -.. only:: html - - .. rst-class:: sphx-glr-signature - - `Gallery generated by Sphinx-Gallery `_ diff --git a/_sources/beginner/examples_tensor/two_layer_net_tensor.rst.txt b/_sources/beginner/examples_tensor/two_layer_net_tensor.rst.txt deleted file mode 100644 index 3d6a79bb0aa..00000000000 --- a/_sources/beginner/examples_tensor/two_layer_net_tensor.rst.txt +++ /dev/null @@ -1,105 +0,0 @@ -.. note:: - :class: sphx-glr-download-link-note - - Click :ref:`here ` to download the full example code -.. rst-class:: sphx-glr-example-title - -.. _sphx_glr_beginner_examples_tensor_two_layer_net_tensor.py: - - -PyTorch: Tensors ----------------- - -A fully-connected ReLU network with one hidden layer and no biases, trained to -predict y from x by minimizing squared Euclidean distance. - -This implementation uses PyTorch tensors to manually compute the forward pass, -loss, and backward pass. - -A PyTorch Tensor is basically the same as a numpy array: it does not know -anything about deep learning or computational graphs or gradients, and is just -a generic n-dimensional array to be used for arbitrary numeric computation. - -The biggest difference between a numpy array and a PyTorch Tensor is that -a PyTorch Tensor can run on either CPU or GPU. To run operations on the GPU, -just cast the Tensor to a cuda datatype. - - -.. code-block:: default - - - import torch - - - dtype = torch.float - device = torch.device("cpu") - # device = torch.device("cuda:0") # Uncomment this to run on GPU - - # N is batch size; D_in is input dimension; - # H is hidden dimension; D_out is output dimension. - N, D_in, H, D_out = 64, 1000, 100, 10 - - # Create random input and output data - x = torch.randn(N, D_in, device=device, dtype=dtype) - y = torch.randn(N, D_out, device=device, dtype=dtype) - - # Randomly initialize weights - w1 = torch.randn(D_in, H, device=device, dtype=dtype) - w2 = torch.randn(H, D_out, device=device, dtype=dtype) - - learning_rate = 1e-6 - for t in range(500): - # Forward pass: compute predicted y - h = x.mm(w1) - h_relu = h.clamp(min=0) - y_pred = h_relu.mm(w2) - - # Compute and print loss - loss = (y_pred - y).pow(2).sum().item() - if t % 100 == 99: - print(t, loss) - - # Backprop to compute gradients of w1 and w2 with respect to loss - grad_y_pred = 2.0 * (y_pred - y) - grad_w2 = h_relu.t().mm(grad_y_pred) - grad_h_relu = grad_y_pred.mm(w2.t()) - grad_h = grad_h_relu.clone() - grad_h[h < 0] = 0 - grad_w1 = x.t().mm(grad_h) - - # Update weights using gradient descent - w1 -= learning_rate * grad_w1 - w2 -= learning_rate * grad_w2 - - -.. rst-class:: sphx-glr-timing - - **Total running time of the script:** ( 0 minutes 0.000 seconds) - - -.. _sphx_glr_download_beginner_examples_tensor_two_layer_net_tensor.py: - - -.. only :: html - - .. container:: sphx-glr-footer - :class: sphx-glr-footer-example - - - - .. container:: sphx-glr-download - - :download:`Download Python source code: two_layer_net_tensor.py ` - - - - .. container:: sphx-glr-download - - :download:`Download Jupyter notebook: two_layer_net_tensor.ipynb ` - - -.. only:: html - - .. rst-class:: sphx-glr-signature - - `Gallery generated by Sphinx-Gallery `_ diff --git a/beginner/examples_autograd/two_layer_net_autograd.html b/beginner/examples_autograd/two_layer_net_autograd.html deleted file mode 100644 index ba5da4ddc7c..00000000000 --- a/beginner/examples_autograd/two_layer_net_autograd.html +++ /dev/null @@ -1,618 +0,0 @@ - - - - - - - - -PyTorch: Tensors and autograd — PyTorch Tutorials 1.7.0 documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-
-
- -
-
-
- Shortcuts -
-
-
-
- -
-
-
- -
-

PyTorch: Tensors and autograd

-

A fully-connected ReLU network with one hidden layer and no biases, trained to -predict y from x by minimizing squared Euclidean distance.

-

This implementation computes the forward pass using operations on PyTorch -Tensors, and uses PyTorch autograd to compute gradients.

-

A PyTorch Tensor represents a node in a computational graph. If x is a -Tensor that has x.requires_grad=True then x.grad is another Tensor -holding the gradient of x with respect to some scalar value.

-
import torch
-
-dtype = torch.float
-device = torch.device("cpu")
-# device = torch.device("cuda:0")  # Uncomment this to run on GPU
-# torch.backends.cuda.matmul.allow_tf32 = False  # Uncomment this to run on GPU
-
-# The above line disables TensorFloat32. This a feature that allows
-# networks to run at a much faster speed while sacrificing precision.
-# Although TensorFloat32 works well on most real models, for our toy model
-# in this tutorial, the sacrificed precision causes convergence issue.
-# For more information, see:
-# https://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-devices
-
-# N is batch size; D_in is input dimension;
-# H is hidden dimension; D_out is output dimension.
-N, D_in, H, D_out = 64, 1000, 100, 10
-
-# Create random Tensors to hold input and outputs.
-# Setting requires_grad=False indicates that we do not need to compute gradients
-# with respect to these Tensors during the backward pass.
-x = torch.randn(N, D_in, device=device, dtype=dtype)
-y = torch.randn(N, D_out, device=device, dtype=dtype)
-
-# Create random Tensors for weights.
-# Setting requires_grad=True indicates that we want to compute gradients with
-# respect to these Tensors during the backward pass.
-w1 = torch.randn(D_in, H, device=device, dtype=dtype, requires_grad=True)
-w2 = torch.randn(H, D_out, device=device, dtype=dtype, requires_grad=True)
-
-learning_rate = 1e-6
-for t in range(500):
-    # Forward pass: compute predicted y using operations on Tensors; these
-    # are exactly the same operations we used to compute the forward pass using
-    # Tensors, but we do not need to keep references to intermediate values since
-    # we are not implementing the backward pass by hand.
-    y_pred = x.mm(w1).clamp(min=0).mm(w2)
-
-    # Compute and print loss using operations on Tensors.
-    # Now loss is a Tensor of shape (1,)
-    # loss.item() gets the scalar value held in the loss.
-    loss = (y_pred - y).pow(2).sum()
-    if t % 100 == 99:
-        print(t, loss.item())
-
-    # Use autograd to compute the backward pass. This call will compute the
-    # gradient of loss with respect to all Tensors with requires_grad=True.
-    # After this call w1.grad and w2.grad will be Tensors holding the gradient
-    # of the loss with respect to w1 and w2 respectively.
-    loss.backward()
-
-    # Manually update weights using gradient descent. Wrap in torch.no_grad()
-    # because weights have requires_grad=True, but we don't need to track this
-    # in autograd.
-    # An alternative way is to operate on weight.data and weight.grad.data.
-    # Recall that tensor.data gives a tensor that shares the storage with
-    # tensor, but doesn't track history.
-    # You can also use torch.optim.SGD to achieve this.
-    with torch.no_grad():
-        w1 -= learning_rate * w1.grad
-        w2 -= learning_rate * w2.grad
-
-        # Manually zero the gradients after updating weights
-        w1.grad.zero_()
-        w2.grad.zero_()
-
-
-

Total running time of the script: ( 0 minutes 0.000 seconds)

- -

Gallery generated by Sphinx-Gallery

-
-
-
- -
-
- -
-
- - - - - - - - - - - - - - - - - - -
-
-
-
-

Docs

-

Access comprehensive developer documentation for PyTorch

-View Docs -
-
-

Tutorials

-

Get in-depth tutorials for beginners and advanced developers

-View Tutorials -
-
-

Resources

-

Find development resources and get your questions answered

-View Resources -
-
-
-
- - - - -
-
-
-
- - -
-
-
- -
- - - - - \ No newline at end of file diff --git a/beginner/examples_autograd/two_layer_net_autograd.html b/beginner/examples_autograd/two_layer_net_autograd.html new file mode 120000 index 00000000000..b3becdbca4e --- /dev/null +++ b/beginner/examples_autograd/two_layer_net_autograd.html @@ -0,0 +1 @@ +beginner/examples_autograd/polynomial_autograd.html \ No newline at end of file diff --git a/beginner/examples_autograd/two_layer_net_custom_function.html b/beginner/examples_autograd/two_layer_net_custom_function.html deleted file mode 100644 index a9997362691..00000000000 --- a/beginner/examples_autograd/two_layer_net_custom_function.html +++ /dev/null @@ -1,635 +0,0 @@ - - - - - - - - -PyTorch: Defining New autograd Functions — PyTorch Tutorials 1.7.0 documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-
-
- -
-
-
- Shortcuts -
-
-
-
- -
-
-
- -
-

PyTorch: Defining New autograd Functions

-

A fully-connected ReLU network with one hidden layer and no biases, trained to -predict y from x by minimizing squared Euclidean distance.

-

This implementation computes the forward pass using operations on PyTorch -Variables, and uses PyTorch autograd to compute gradients.

-

In this implementation we implement our own custom autograd function to perform -the ReLU function.

-
import torch
-
-
-class MyReLU(torch.autograd.Function):
-    """
-    We can implement our own custom autograd Functions by subclassing
-    torch.autograd.Function and implementing the forward and backward passes
-    which operate on Tensors.
-    """
-
-    @staticmethod
-    def forward(ctx, input):
-        """
-        In the forward pass we receive a Tensor containing the input and return
-        a Tensor containing the output. ctx is a context object that can be used
-        to stash information for backward computation. You can cache arbitrary
-        objects for use in the backward pass using the ctx.save_for_backward method.
-        """
-        ctx.save_for_backward(input)
-        return input.clamp(min=0)
-
-    @staticmethod
-    def backward(ctx, grad_output):
-        """
-        In the backward pass we receive a Tensor containing the gradient of the loss
-        with respect to the output, and we need to compute the gradient of the loss
-        with respect to the input.
-        """
-        input, = ctx.saved_tensors
-        grad_input = grad_output.clone()
-        grad_input[input < 0] = 0
-        return grad_input
-
-
-dtype = torch.float
-device = torch.device("cpu")
-# device = torch.device("cuda:0")  # Uncomment this to run on GPU
-# torch.backends.cuda.matmul.allow_tf32 = False  # Uncomment this to run on GPU
-
-# The above line disables TensorFloat32. This a feature that allows
-# networks to run at a much faster speed while sacrificing precision.
-# Although TensorFloat32 works well on most real models, for our toy model
-# in this tutorial, the sacrificed precision causes convergence issue.
-# For more information, see:
-# https://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-devices
-
-# N is batch size; D_in is input dimension;
-# H is hidden dimension; D_out is output dimension.
-N, D_in, H, D_out = 64, 1000, 100, 10
-
-# Create random Tensors to hold input and outputs.
-x = torch.randn(N, D_in, device=device, dtype=dtype)
-y = torch.randn(N, D_out, device=device, dtype=dtype)
-
-# Create random Tensors for weights.
-w1 = torch.randn(D_in, H, device=device, dtype=dtype, requires_grad=True)
-w2 = torch.randn(H, D_out, device=device, dtype=dtype, requires_grad=True)
-
-learning_rate = 1e-6
-for t in range(500):
-    # To apply our Function, we use Function.apply method. We alias this as 'relu'.
-    relu = MyReLU.apply
-
-    # Forward pass: compute predicted y using operations; we compute
-    # ReLU using our custom autograd operation.
-    y_pred = relu(x.mm(w1)).mm(w2)
-
-    # Compute and print loss
-    loss = (y_pred - y).pow(2).sum()
-    if t % 100 == 99:
-        print(t, loss.item())
-
-    # Use autograd to compute the backward pass.
-    loss.backward()
-
-    # Update weights using gradient descent
-    with torch.no_grad():
-        w1 -= learning_rate * w1.grad
-        w2 -= learning_rate * w2.grad
-
-        # Manually zero the gradients after updating weights
-        w1.grad.zero_()
-        w2.grad.zero_()
-
-
-

Total running time of the script: ( 0 minutes 0.000 seconds)

- -

Gallery generated by Sphinx-Gallery

-
-
-
- -
-
- -
-
- - - - - - - - - - - - - - - - - - -
-
-
-
-

Docs

-

Access comprehensive developer documentation for PyTorch

-View Docs -
-
-

Tutorials

-

Get in-depth tutorials for beginners and advanced developers

-View Tutorials -
-
-

Resources

-

Find development resources and get your questions answered

-View Resources -
-
-
-
- - - - -
-
-
-
- - -
-
-
- -
- - - - - \ No newline at end of file diff --git a/beginner/examples_autograd/two_layer_net_custom_function.html b/beginner/examples_autograd/two_layer_net_custom_function.html new file mode 120000 index 00000000000..76ad5f084ae --- /dev/null +++ b/beginner/examples_autograd/two_layer_net_custom_function.html @@ -0,0 +1 @@ +beginner/examples_autograd/polynomial_custom_function.html \ No newline at end of file diff --git a/beginner/examples_nn/two_layer_net_module.html b/beginner/examples_nn/two_layer_net_module.html deleted file mode 100644 index 68cb0e0c277..00000000000 --- a/beginner/examples_nn/two_layer_net_module.html +++ /dev/null @@ -1,604 +0,0 @@ - - - - - - - - -PyTorch: Custom nn Modules — PyTorch Tutorials 1.7.0 documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-
-
- -
-
-
- Shortcuts -
-
-
-
- -
-
-
- -
-

PyTorch: Custom nn Modules

-

A fully-connected ReLU network with one hidden layer, trained to predict y from x -by minimizing squared Euclidean distance.

-

This implementation defines the model as a custom Module subclass. Whenever you -want a model more complex than a simple sequence of existing Modules you will -need to define your model this way.

-
import torch
-
-
-class TwoLayerNet(torch.nn.Module):
-    def __init__(self, D_in, H, D_out):
-        """
-        In the constructor we instantiate two nn.Linear modules and assign them as
-        member variables.
-        """
-        super(TwoLayerNet, self).__init__()
-        self.linear1 = torch.nn.Linear(D_in, H)
-        self.linear2 = torch.nn.Linear(H, D_out)
-
-    def forward(self, x):
-        """
-        In the forward function we accept a Tensor of input data and we must return
-        a Tensor of output data. We can use Modules defined in the constructor as
-        well as arbitrary operators on Tensors.
-        """
-        h_relu = self.linear1(x).clamp(min=0)
-        y_pred = self.linear2(h_relu)
-        return y_pred
-
-
-# N is batch size; D_in is input dimension;
-# H is hidden dimension; D_out is output dimension.
-N, D_in, H, D_out = 64, 1000, 100, 10
-
-# Create random Tensors to hold inputs and outputs
-x = torch.randn(N, D_in)
-y = torch.randn(N, D_out)
-
-# Construct our model by instantiating the class defined above
-model = TwoLayerNet(D_in, H, D_out)
-
-# Construct our loss function and an Optimizer. The call to model.parameters()
-# in the SGD constructor will contain the learnable parameters of the two
-# nn.Linear modules which are members of the model.
-criterion = torch.nn.MSELoss(reduction='sum')
-optimizer = torch.optim.SGD(model.parameters(), lr=1e-4)
-for t in range(500):
-    # Forward pass: Compute predicted y by passing x to the model
-    y_pred = model(x)
-
-    # Compute and print loss
-    loss = criterion(y_pred, y)
-    if t % 100 == 99:
-        print(t, loss.item())
-
-    # Zero gradients, perform a backward pass, and update the weights.
-    optimizer.zero_grad()
-    loss.backward()
-    optimizer.step()
-
-
-

Total running time of the script: ( 0 minutes 0.000 seconds)

- -

Gallery generated by Sphinx-Gallery

-
-
-
- -
-
- -
-
- - - - - - - - - - - - - - - - - - -
-
-
-
-

Docs

-

Access comprehensive developer documentation for PyTorch

-View Docs -
-
-

Tutorials

-

Get in-depth tutorials for beginners and advanced developers

-View Tutorials -
-
-

Resources

-

Find development resources and get your questions answered

-View Resources -
-
-
-
- - - - -
-
-
-
- - -
-
-
- -
- - - - - \ No newline at end of file diff --git a/beginner/examples_nn/two_layer_net_module.html b/beginner/examples_nn/two_layer_net_module.html new file mode 120000 index 00000000000..4cbc2016dcb --- /dev/null +++ b/beginner/examples_nn/two_layer_net_module.html @@ -0,0 +1 @@ +beginner/examples_nn/polynomial_module.html \ No newline at end of file diff --git a/beginner/examples_nn/two_layer_net_nn.html b/beginner/examples_nn/two_layer_net_nn.html deleted file mode 100644 index bb76ff0796f..00000000000 --- a/beginner/examples_nn/two_layer_net_nn.html +++ /dev/null @@ -1,607 +0,0 @@ - - - - - - - - -PyTorch: nn — PyTorch Tutorials 1.7.0 documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-
-
- -
-
-
- Shortcuts -
-
-
-
- -
-
-
- -
-

PyTorch: nn

-

A fully-connected ReLU network with one hidden layer, trained to predict y from x -by minimizing squared Euclidean distance.

-

This implementation uses the nn package from PyTorch to build the network. -PyTorch autograd makes it easy to define computational graphs and take gradients, -but raw autograd can be a bit too low-level for defining complex neural networks; -this is where the nn package can help. The nn package defines a set of Modules, -which you can think of as a neural network layer that has produces output from -input and may have some trainable weights.

-
import torch
-
-# N is batch size; D_in is input dimension;
-# H is hidden dimension; D_out is output dimension.
-N, D_in, H, D_out = 64, 1000, 100, 10
-
-# Create random Tensors to hold inputs and outputs
-x = torch.randn(N, D_in)
-y = torch.randn(N, D_out)
-
-# Use the nn package to define our model as a sequence of layers. nn.Sequential
-# is a Module which contains other Modules, and applies them in sequence to
-# produce its output. Each Linear Module computes output from input using a
-# linear function, and holds internal Tensors for its weight and bias.
-model = torch.nn.Sequential(
-    torch.nn.Linear(D_in, H),
-    torch.nn.ReLU(),
-    torch.nn.Linear(H, D_out),
-)
-
-# The nn package also contains definitions of popular loss functions; in this
-# case we will use Mean Squared Error (MSE) as our loss function.
-loss_fn = torch.nn.MSELoss(reduction='sum')
-
-learning_rate = 1e-4
-for t in range(500):
-    # Forward pass: compute predicted y by passing x to the model. Module objects
-    # override the __call__ operator so you can call them like functions. When
-    # doing so you pass a Tensor of input data to the Module and it produces
-    # a Tensor of output data.
-    y_pred = model(x)
-
-    # Compute and print loss. We pass Tensors containing the predicted and true
-    # values of y, and the loss function returns a Tensor containing the
-    # loss.
-    loss = loss_fn(y_pred, y)
-    if t % 100 == 99:
-        print(t, loss.item())
-
-    # Zero the gradients before running the backward pass.
-    model.zero_grad()
-
-    # Backward pass: compute gradient of the loss with respect to all the learnable
-    # parameters of the model. Internally, the parameters of each Module are stored
-    # in Tensors with requires_grad=True, so this call will compute gradients for
-    # all learnable parameters in the model.
-    loss.backward()
-
-    # Update the weights using gradient descent. Each parameter is a Tensor, so
-    # we can access its gradients like we did before.
-    with torch.no_grad():
-        for param in model.parameters():
-            param -= learning_rate * param.grad
-
-
-

Total running time of the script: ( 0 minutes 0.000 seconds)

- -

Gallery generated by Sphinx-Gallery

-
-
-
- -
-
-
-
-
- -
-
-
-
-
- - - - - - - - - - - - - - - - - - -
-
-
-
-

Docs

-

Access comprehensive developer documentation for PyTorch

-View Docs -
-
-

Tutorials

-

Get in-depth tutorials for beginners and advanced developers

-View Tutorials -
-
-

Resources

-

Find development resources and get your questions answered

-View Resources -
-
-
-
- - - - -
-
-
-
- - -
-
-
- -
- - - - - \ No newline at end of file diff --git a/beginner/examples_nn/two_layer_net_nn.html b/beginner/examples_nn/two_layer_net_nn.html new file mode 120000 index 00000000000..a398f26bb6f --- /dev/null +++ b/beginner/examples_nn/two_layer_net_nn.html @@ -0,0 +1 @@ +beginner/examples_nn/polynomial_nn.html \ No newline at end of file diff --git a/beginner/examples_nn/two_layer_net_optim.html b/beginner/examples_nn/two_layer_net_optim.html deleted file mode 100644 index 44836b3045c..00000000000 --- a/beginner/examples_nn/two_layer_net_optim.html +++ /dev/null @@ -1,600 +0,0 @@ - - - - - - - - -PyTorch: optim — PyTorch Tutorials 1.7.0 documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-
-
- -
-
-
- Shortcuts -
-
-
-
- -
-
-
- -
-

PyTorch: optim

-

A fully-connected ReLU network with one hidden layer, trained to predict y from x -by minimizing squared Euclidean distance.

-

This implementation uses the nn package from PyTorch to build the network.

-

Rather than manually updating the weights of the model as we have been doing, -we use the optim package to define an Optimizer that will update the weights -for us. The optim package defines many optimization algorithms that are commonly -used for deep learning, including SGD+momentum, RMSProp, Adam, etc.

-
import torch
-
-# N is batch size; D_in is input dimension;
-# H is hidden dimension; D_out is output dimension.
-N, D_in, H, D_out = 64, 1000, 100, 10
-
-# Create random Tensors to hold inputs and outputs
-x = torch.randn(N, D_in)
-y = torch.randn(N, D_out)
-
-# Use the nn package to define our model and loss function.
-model = torch.nn.Sequential(
-    torch.nn.Linear(D_in, H),
-    torch.nn.ReLU(),
-    torch.nn.Linear(H, D_out),
-)
-loss_fn = torch.nn.MSELoss(reduction='sum')
-
-# Use the optim package to define an Optimizer that will update the weights of
-# the model for us. Here we will use Adam; the optim package contains many other
-# optimization algorithms. The first argument to the Adam constructor tells the
-# optimizer which Tensors it should update.
-learning_rate = 1e-4
-optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
-for t in range(500):
-    # Forward pass: compute predicted y by passing x to the model.
-    y_pred = model(x)
-
-    # Compute and print loss.
-    loss = loss_fn(y_pred, y)
-    if t % 100 == 99:
-        print(t, loss.item())
-
-    # Before the backward pass, use the optimizer object to zero all of the
-    # gradients for the variables it will update (which are the learnable
-    # weights of the model). This is because by default, gradients are
-    # accumulated in buffers( i.e, not overwritten) whenever .backward()
-    # is called. Checkout docs of torch.autograd.backward for more details.
-    optimizer.zero_grad()
-
-    # Backward pass: compute gradient of the loss with respect to model
-    # parameters
-    loss.backward()
-
-    # Calling the step function on an Optimizer makes an update to its
-    # parameters
-    optimizer.step()
-
-
-

Total running time of the script: ( 0 minutes 0.000 seconds)

- -

Gallery generated by Sphinx-Gallery

-
-
-
- -
-
-
-
- -
-
-
-
- - - - - - - - - - - - - - - - - - -
-
-
-
-

Docs

-

Access comprehensive developer documentation for PyTorch

-View Docs -
-
-

Tutorials

-

Get in-depth tutorials for beginners and advanced developers

-View Tutorials -
-
-

Resources

-

Find development resources and get your questions answered

-View Resources -
-
-
-
- - - - -
-
-
-
- - -
-
-
- -
- - - - - \ No newline at end of file diff --git a/beginner/examples_nn/two_layer_net_optim.html b/beginner/examples_nn/two_layer_net_optim.html new file mode 120000 index 00000000000..7e7a248b13d --- /dev/null +++ b/beginner/examples_nn/two_layer_net_optim.html @@ -0,0 +1 @@ +beginner/examples_nn/polynomial_optim.html \ No newline at end of file diff --git a/beginner/examples_tensor/two_layer_net_numpy.html b/beginner/examples_tensor/two_layer_net_numpy.html deleted file mode 100644 index 0ff835eea31..00000000000 --- a/beginner/examples_tensor/two_layer_net_numpy.html +++ /dev/null @@ -1,589 +0,0 @@ - - - - - - - - -Warm-up: numpy — PyTorch Tutorials 1.7.0 documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-
-
- -
-
-
- Shortcuts -
-
-
-
- -
-
-
- -
-

Warm-up: numpy

-

A fully-connected ReLU network with one hidden layer and no biases, trained to -predict y from x using Euclidean error.

-

This implementation uses numpy to manually compute the forward pass, loss, and -backward pass.

-

A numpy array is a generic n-dimensional array; it does not know anything about -deep learning or gradients or computational graphs, and is just a way to perform -generic numeric computations.

-
import numpy as np
-
-# N is batch size; D_in is input dimension;
-# H is hidden dimension; D_out is output dimension.
-N, D_in, H, D_out = 64, 1000, 100, 10
-
-# Create random input and output data
-x = np.random.randn(N, D_in)
-y = np.random.randn(N, D_out)
-
-# Randomly initialize weights
-w1 = np.random.randn(D_in, H)
-w2 = np.random.randn(H, D_out)
-
-learning_rate = 1e-6
-for t in range(500):
-    # Forward pass: compute predicted y
-    h = x.dot(w1)
-    h_relu = np.maximum(h, 0)
-    y_pred = h_relu.dot(w2)
-
-    # Compute and print loss
-    loss = np.square(y_pred - y).sum()
-    print(t, loss)
-
-    # Backprop to compute gradients of w1 and w2 with respect to loss
-    grad_y_pred = 2.0 * (y_pred - y)
-    grad_w2 = h_relu.T.dot(grad_y_pred)
-    grad_h_relu = grad_y_pred.dot(w2.T)
-    grad_h = grad_h_relu.copy()
-    grad_h[h < 0] = 0
-    grad_w1 = x.T.dot(grad_h)
-
-    # Update weights
-    w1 -= learning_rate * grad_w1
-    w2 -= learning_rate * grad_w2
-
-
-

Total running time of the script: ( 0 minutes 0.000 seconds)

- -

Gallery generated by Sphinx-Gallery

-
-
-
- -
-
-
-
- -
-
-
-
- - - - - - - - - - - - - - - - - - -
-
-
-
-

Docs

-

Access comprehensive developer documentation for PyTorch

-View Docs -
-
-

Tutorials

-

Get in-depth tutorials for beginners and advanced developers

-View Tutorials -
-
-

Resources

-

Find development resources and get your questions answered

-View Resources -
-
-
-
- - - - -
-
-
-
- - -
-
-
- -
- - - - - \ No newline at end of file diff --git a/beginner/examples_tensor/two_layer_net_numpy.html b/beginner/examples_tensor/two_layer_net_numpy.html new file mode 120000 index 00000000000..1327fa851b4 --- /dev/null +++ b/beginner/examples_tensor/two_layer_net_numpy.html @@ -0,0 +1 @@ +beginner/examples_tensor/polynomial_numpy.html \ No newline at end of file diff --git a/beginner/examples_tensor/two_layer_net_tensor.html b/beginner/examples_tensor/two_layer_net_tensor.html deleted file mode 100644 index 03f54270045..00000000000 --- a/beginner/examples_tensor/two_layer_net_tensor.html +++ /dev/null @@ -1,598 +0,0 @@ - - - - - - - - -PyTorch: Tensors — PyTorch Tutorials 1.7.0 documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-
-
- -
-
-
- Shortcuts -
-
-
-
- -
-
-
- -
-

PyTorch: Tensors

-

A fully-connected ReLU network with one hidden layer and no biases, trained to -predict y from x by minimizing squared Euclidean distance.

-

This implementation uses PyTorch tensors to manually compute the forward pass, -loss, and backward pass.

-

A PyTorch Tensor is basically the same as a numpy array: it does not know -anything about deep learning or computational graphs or gradients, and is just -a generic n-dimensional array to be used for arbitrary numeric computation.

-

The biggest difference between a numpy array and a PyTorch Tensor is that -a PyTorch Tensor can run on either CPU or GPU. To run operations on the GPU, -just cast the Tensor to a cuda datatype.

-
import torch
-
-
-dtype = torch.float
-device = torch.device("cpu")
-# device = torch.device("cuda:0") # Uncomment this to run on GPU
-
-# N is batch size; D_in is input dimension;
-# H is hidden dimension; D_out is output dimension.
-N, D_in, H, D_out = 64, 1000, 100, 10
-
-# Create random input and output data
-x = torch.randn(N, D_in, device=device, dtype=dtype)
-y = torch.randn(N, D_out, device=device, dtype=dtype)
-
-# Randomly initialize weights
-w1 = torch.randn(D_in, H, device=device, dtype=dtype)
-w2 = torch.randn(H, D_out, device=device, dtype=dtype)
-
-learning_rate = 1e-6
-for t in range(500):
-    # Forward pass: compute predicted y
-    h = x.mm(w1)
-    h_relu = h.clamp(min=0)
-    y_pred = h_relu.mm(w2)
-
-    # Compute and print loss
-    loss = (y_pred - y).pow(2).sum().item()
-    if t % 100 == 99:
-        print(t, loss)
-
-    # Backprop to compute gradients of w1 and w2 with respect to loss
-    grad_y_pred = 2.0 * (y_pred - y)
-    grad_w2 = h_relu.t().mm(grad_y_pred)
-    grad_h_relu = grad_y_pred.mm(w2.t())
-    grad_h = grad_h_relu.clone()
-    grad_h[h < 0] = 0
-    grad_w1 = x.t().mm(grad_h)
-
-    # Update weights using gradient descent
-    w1 -= learning_rate * grad_w1
-    w2 -= learning_rate * grad_w2
-
-
-

Total running time of the script: ( 0 minutes 0.000 seconds)

- -

Gallery generated by Sphinx-Gallery

-
-
-
- -
-
-
-
- -
-
-
-
- - - - - - - - - - - - - - - - - - -
-
-
-
-

Docs

-

Access comprehensive developer documentation for PyTorch

-View Docs -
-
-

Tutorials

-

Get in-depth tutorials for beginners and advanced developers

-View Tutorials -
-
-

Resources

-

Find development resources and get your questions answered

-View Resources -
-
-
-
- - - - -
-
-
-
- - -
-
-
- -
- - - - - \ No newline at end of file diff --git a/beginner/examples_tensor/two_layer_net_tensor.html b/beginner/examples_tensor/two_layer_net_tensor.html new file mode 120000 index 00000000000..c063e8c7a2d --- /dev/null +++ b/beginner/examples_tensor/two_layer_net_tensor.html @@ -0,0 +1 @@ +beginner/examples_tensor/polynomial_tensor.html \ No newline at end of file