comments

StoneT2000 · StoneT2000 · commit 44bba3e43580 · 2021-04-25T22:34:45.000-07:00
diff --git a/SP21/GAN/vanilla_gan.ipynb b/SP21/GAN/vanilla_gan.ipynb
@@ -68,8 +68,8 @@
     "    ------------\n",
     "    Input Image: img_shape\n",
     "    Flattened\n",
-    "    Linear MLP(128, 512, 256, 1)\n",
-    "    Relu activation after every layer except last.\n",
+    "    Linear MLP(1024, 512, 256, 1)\n",
+    "    Leaky Relu activation after every layer except last.\n",
     "    Sigmoid activation after last layer to normalize in range 0 to 1\n",
     "    \"\"\"\n",
     "    def __init__(self, img_shape):\n",
@@ -122,7 +122,8 @@
     "gen_optim = torch.optim.Adam(generator.parameters(), lr=2e-4)\n",
     "disc_optim = torch.optim.Adam(discriminator.parameters(), lr=2e-4)\n",
     "\n",
-    "# use gpu if possible\n",
+    "# .to(device) moves the networks / models to that device, which is either CPU or the GPU depending on what was detected\n",
+    "# if moved to GPU, then the networks can make use of the GPU for computations which is much faster!\n",
     "generator = generator.to(device)\n",
     "discriminator = discriminator.to(device)"
    ]
@@ -140,31 +141,43 @@
     "        print(\"Epoch {}\".format(epoch))\n",
     "        avg_g_loss = 0\n",
     "        avg_d_loss = 0\n",
+    "        \n",
+    "        # notebook.tqdm is a nice way of displaying progress on a jupyter or colab notebook while we loop over the data in train_dataloader\n",
     "        pbar = notebook.tqdm(train_dataloader, total=len(train_dataloader))\n",
     "        i = 0\n",
     "        for data in pbar:\n",
     "            i += 1\n",
     "            real_images = data[0].to(device)\n",
     "            ### Train Generator ###\n",
+    "\n",
+    "            # .zero_grad() is important in PyTorch. Don't forget it. If you do, the optimizer won't work.\n",
     "            generator_optim.zero_grad()\n",
     "            \n",
     "            latent_input = torch.randn((len(real_images), 1, *latent_shape)).to(device)\n",
     "            fake_images = generator(latent_input)\n",
     "\n",
     "            fake_res = discriminator(fake_images)\n",
     "            \n",
+    "            # we penalize the generator for being unable to make the discrminator predict 1s for generated fake images\n",
     "            generator_loss = adversarial_loss(fake_res, torch.ones_like(fake_res))\n",
+    "\n",
+    "            # .backward() computes gradients for the loss function with respect to anything that is not detached\n",
     "            generator_loss.backward()\n",
+    "            # .step() uses a optimizer to apply the gradients to the model parameters, updating the model to reduce the loss\n",
     "            generator_optim.step()\n",
     "            \n",
     "            ### Train Discriminator ###\n",
     "            discriminator_optim.zero_grad()\n",
     "            \n",
     "            real_res = discriminator(real_images)\n",
     "\n",
+    "            # .detach() removes fake_images variable from gradient computation, meaning our \n",
+    "            # generator is not going to be updated when we use the optimizer\n",
     "            fake_res = discriminator(fake_images.detach())\n",
     "\n",
+    "            # we penalize the discriminator for not predicting 1s for real images\n",
     "            discriminator_real_loss = adversarial_loss(real_res, torch.ones_like(real_res))\n",
+    "            # we penalize the discriminator for not predicting 0s for generated, fake images\n",
     "            discriminator_fake_loss = adversarial_loss(fake_res, torch.zeros_like(real_res))\n",
     "            \n",
     "            discriminator_loss = (discriminator_real_loss + discriminator_fake_loss) / 2\n",