ceceshao1 · May 16, 2019 15:44
diff --git a/weight-initialization.ipynb b/weight-initialization.ipynb
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from comet_ml import Experiment\n",
    "import torch\n",
    "import torch.nn as nn\n",
    "import torchvision.transforms as transforms\n",
    "import torchvision.datasets as dsets\n",
    "from torch.autograd import Variable"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "COMET INFO: Experiment is live on comet.ml https://www.comet.ml/ceceshao1/weight-initialization/b8fec6d781a04706b94d85499e09ec95\n",
      "\n"
     ]
    }
   ],
   "source": [
    "experiment = Experiment(api_key=\"YOUR_API_KEY\",\n",
    "                        project_name=\"YOUR_PROJECT_NAME\", workspace=\"YOUR_WORKSPACE_NAME\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Set seed\n",
    "random_seed = torch.manual_seed(19)\n",
    "experiment.log_other(random_seed, 19)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Scheduler import\n",
    "from torch.optim.lr_scheduler import StepLR"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "\n",
       "        <iframe\n",
       "            width=\"100%\"\n",
       "            height=\"800px\"\n",
       "            src=\"https://www.comet.ml/ceceshao1/weight-initialization/b8fec6d781a04706b94d85499e09ec95\"\n",
       "            frameborder=\"0\"\n",
       "            allowfullscreen\n",
       "        ></iframe>\n",
       "        "
      ],
      "text/plain": [
       "<IPython.lib.display.IFrame at 0x104e87f60>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "experiment.display()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Loading Dataset "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_dataset = dsets.MNIST(root='./data', \n",
    "                            train=True, \n",
    "                            transform=transforms.ToTensor(),\n",
    "                            download=True)\n",
    "\n",
    "test_dataset = dsets.MNIST(root='./data', \n",
    "                           train=False, \n",
    "                           transform=transforms.ToTensor())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "experiment.log_dataset_hash(train_dataset)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Make Dataset Iterable (using data loaders)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "#set parameters \n",
    "\n",
    "batch_size = 100\n",
    "n_iters = 3000\n",
    "num_epochs = n_iters / (len(train_dataset) / batch_size)\n",
    "num_epochs = int(num_epochs)\n",
    "learning_rate = 0.1\n",
    "\n",
    "params = {\n",
    "    \"batch_size\": batch_size,\n",
    "    \"n_iters\": n_iters,\n",
    "    \"num_epochs\": num_epochs,\n",
    "    \"learning_rate\": learning_rate\n",
    "}\n",
    "\n",
    "experiment.log_parameters(params)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_loader = torch.utils.data.DataLoader(dataset=train_dataset, \n",
    "                                           batch_size=batch_size, \n",
    "                                           shuffle=True)\n",
    "\n",
    "test_loader = torch.utils.data.DataLoader(dataset=test_dataset, \n",
    "                                          batch_size=batch_size, \n",
    "                                          shuffle=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Create the Model Class \n",
    "\n",
    "> Note: Depending on whether you'd like to use the tanh activation or ReLU activation, you should only run one of the following two cells\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### OPTION 1:\n",
    "Using tanh activation -```self.tanh = nn.Tanh()``` and:\n",
    "- normal weight initialization\n",
    "- lecunn weight initialization\n",
    "- xavier weight initialization"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "class FeedforwardNeuralNetModel(nn.Module):\n",
    "    def __init__(self, input_dim, hidden_dim, output_dim):\n",
    "        super(FeedforwardNeuralNetModel, self).__init__()\n",
    "        # Linear function\n",
    "        self.fc1 = nn.Linear(input_dim, hidden_dim) \n",
    "        ### COMMENT EVERYTHING OUT FOR LECUN ACTIVATION\n",
    "        \n",
    "        ### FOR NORMAL INITIALIZATION\n",
    "        # Linear weight, W,  Y = WX + B\n",
    "        #nn.init.normal_(self.fc1.weight, mean=0, std=1)\n",
    "        ### FOR XAVIER INITIALIZATION\n",
    "        # Linear weight, W,  Y = WX + B\n",
    "        nn.init.xavier_normal_(self.fc1.weight)\n",
    "        # Non-linearity\n",
    "        self.tanh = nn.Tanh()\n",
    "        # Linear function\n",
    "        self.fc2 = nn.Linear(hidden_dim, output_dim)  \n",
    "        #nn.init.normal_(self.fc2.weight, mean=0, std=1)\n",
    "        nn.init.xavier_normal_(self.fc2.weight)\n",
    "\n",
    "    def forward(self, x):\n",
    "        # Linear function\n",
    "        out = self.fc1(x)\n",
    "        # Non-linearity\n",
    "        out = self.tanh(out)\n",
    "        # Linear function (readout)\n",
    "        out = self.fc2(out)\n",
    "        return out\n",
    "\n",
    "#experiment.log_other(\"initialization\", \"normal\")\n",
    "#experiment.log_other(\"initialization\", \"lecun\")\n",
    "experiment.log_other(\"initialization\", \"xavier\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### OPTION 2:\n",
    "Using ReLU activation -```self.relu = nn.ReLU()``` and:\n",
    "- xavier weight initialization\n",
    "- he weight initialization"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "class FeedforwardNeuralNetModel(nn.Module):\n",
    "    def __init__(self, input_dim, hidden_dim, output_dim):\n",
    "        super(FeedforwardNeuralNetModel, self).__init__()\n",
    "        # Linear function\n",
    "        self.fc1 = nn.Linear(input_dim, hidden_dim) \n",
    "        #### FOR HE WEIGHT INITIALIZATION\n",
    "        # Linear weight, W,  Y = WX + B\n",
    "        # nn.init.kaiming_normal_(self.fc1.weight)\n",
    "        ### FOR XAVIER WEIGHT INITIALIZATION\n",
    "        # Linear weight W, Y = WX+B\n",
    "        nn.init.xavier_normal_(self.fc1.weight)\n",
    "        # Non-linearity\n",
    "        self.relu = nn.ReLU()\n",
    "        # Linear function (readout)\n",
    "        self.fc2 = nn.Linear(hidden_dim, output_dim)  \n",
    "        #nn.init.kaiming_normal_(self.fc2.weight)\n",
    "        nn.init.xavier_normal_(self.fc2.weight)\n",
    "\n",
    "    def forward(self, x):\n",
    "        # Linear function\n",
    "        out = self.fc1(x)\n",
    "        # Non-linearity\n",
    "        out = self.relu(out)\n",
    "        # Linear function (readout)\n",
    "        out = self.fc2(out)\n",
    "        return out\n",
    "    \n",
    "    \n",
    "# experiment.log_other(\"initialization\", \"he\")\n",
    "experiment.log_other(\"initialization\", \"xavier\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Instantiate Model Class"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "input_dim = 28*28\n",
    "hidden_dim = 100\n",
    "output_dim = 10\n",
    "\n",
    "model = FeedforwardNeuralNetModel(input_dim, hidden_dim, output_dim)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Define Loss Class "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "criterion = nn.CrossEntropyLoss()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Define Optimizer Class "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, nesterov=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Instantiate Step Learning Scheduler Class"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "# step_size: at how many multiples of epoch you decay\n",
    "# step_size = 1, after every 2 epoch, new_lr = lr*gamma \n",
    "# step_size = 2, after every 2 epoch, new_lr = lr*gamma \n",
    "# gamma = decaying factor\n",
    "\n",
    "scheduler = StepLR(optimizer, step_size=1, gamma=0.96)\n",
    "experiment.log_parameter(\"gamma\", 0.96)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Train the Model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: 0 LR: [0.1]\n",
      "Iteration: 500. Loss: 0.18761786818504333. Accuracy: 95.26\n",
      "Epoch: 1 LR: [0.096]\n",
      "Iteration: 1000. Loss: 0.07996946573257446. Accuracy: 96.56\n",
      "Epoch: 2 LR: [0.09216]\n",
      "Iteration: 1500. Loss: 0.03148220479488373. Accuracy: 96.58\n",
      "Epoch: 3 LR: [0.08847359999999999]\n",
      "Iteration: 2000. Loss: 0.09320910274982452. Accuracy: 97.11\n",
      "Epoch: 4 LR: [0.084934656]\n",
      "Iteration: 2500. Loss: 0.03602292016148567. Accuracy: 97.51\n",
      "Iteration: 3000. Loss: 0.010576700791716576. Accuracy: 97.36\n"
     ]
    }
   ],
   "source": [
    "iter = 0\n",
    "for epoch in range(num_epochs):\n",
    "    # Decay Learning Rate\n",
    "    scheduler.step()\n",
    "    # Print Learning Rate\n",
    "    print('Epoch:', epoch,'LR:', scheduler.get_lr())\n",
    "    for i, (images, labels) in enumerate(train_loader):\n",
    "        # Load images as tensors with gradient accumulation abilities\n",
    "        images = images.view(-1, 28*28).requires_grad_()\n",
    "\n",
    "        # Clear gradients w.r.t. parameters\n",
    "        optimizer.zero_grad()\n",
    "\n",
    "        # Forward pass to get output/logits\n",
    "        outputs = model(images)\n",
    "\n",
    "        # Calculate Loss: softmax --> cross entropy loss\n",
    "        loss = criterion(outputs, labels)\n",
    "        experiment.log_metric(\"loss\", loss)\n",
    "\n",
    "        # Getting gradients w.r.t. parameters\n",
    "        loss.backward()\n",
    "\n",
    "        # Updating parameters\n",
    "        optimizer.step()\n",
    "\n",
    "        iter += 1\n",
    "\n",
    "        if iter % 500 == 0:\n",
    "            # Calculate Accuracy         \n",
    "            correct = 0\n",
    "            total = 0\n",
    "            # Iterate through test dataset\n",
    "            for images, labels in test_loader:\n",
    "                # Load images to a Torch Variable\n",
    "                images = images.view(-1, 28*28)\n",
    "\n",
    "                # Forward pass only to get logits/output\n",
    "                outputs = model(images)\n",
    "\n",
    "                # Get predictions from the maximum value\n",
    "                _, predicted = torch.max(outputs.data, 1)\n",
    "\n",
    "                # Total number of labels\n",
    "                total += labels.size(0)\n",
    "\n",
    "               # Total correct predictions\n",
    "                correct += (predicted.type(torch.FloatTensor).cpu() == labels.type(torch.FloatTensor)).sum()\n",
    "\n",
    "            accuracy = 100. * correct.item() / total\n",
    "            experiment.log_metric(\"accuracy\", accuracy)\n",
    "            \n",
    "            # Print Loss\n",
    "            print('Iteration: {}. Loss: {}. Accuracy: {}'.format(iter, loss.item(), accuracy))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "COMET INFO: ----------------------------\n",
      "COMET INFO: Comet.ml Experiment Summary:\n",
      "COMET INFO:   Data:\n",
      "COMET INFO:     url: https://www.comet.ml/ceceshao1/weight-initialization/b8fec6d781a04706b94d85499e09ec95\n",
      "COMET INFO:   Metrics:\n",
      "COMET INFO:     accuracy: 97.36\n",
      "COMET INFO:         loss: tensor(0.0106, grad_fn=<NllLossBackward>)\n",
      "COMET INFO: Uploading stats to Comet before program termination (may take several seconds)\n"
     ]
    }
   ],
   "source": [
    "# Run this cell once your model has completed to training to signal the end of the experiment \n",
    "experiment.end()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {},
	"outputs": [],
	"source": [
	"from comet_ml import Experiment\n",
	"import torch\n",
	"import torch.nn as nn\n",
	"import torchvision.transforms as transforms\n",
	"import torchvision.datasets as dsets\n",
	"from torch.autograd import Variable"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"COMET INFO: Experiment is live on comet.ml https://www.comet.ml/ceceshao1/weight-initialization/b8fec6d781a04706b94d85499e09ec95\n",
	"\n"
	]
	}
	],
	"source": [
	"experiment = Experiment(api_key=\"YOUR_API_KEY\",\n",
	" project_name=\"YOUR_PROJECT_NAME\", workspace=\"YOUR_WORKSPACE_NAME\")"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {},
	"outputs": [],
	"source": [
	"# Set seed\n",
	"random_seed = torch.manual_seed(19)\n",
	"experiment.log_other(random_seed, 19)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {},
	"outputs": [],
	"source": [
	"# Scheduler import\n",
	"from torch.optim.lr_scheduler import StepLR"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"\n",
	" <iframe\n",
	" width=\"100%\"\n",
	" height=\"800px\"\n",
	" src=\"https://www.comet.ml/ceceshao1/weight-initialization/b8fec6d781a04706b94d85499e09ec95\"\n",
	" frameborder=\"0\"\n",
	" allowfullscreen\n",
	" ></iframe>\n",
	" "
	],
	"text/plain": [
	"<IPython.lib.display.IFrame at 0x104e87f60>"
	]
	},
	"metadata": {},
	"output_type": "display_data"
	}
	],
	"source": [
	"experiment.display()"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### Loading Dataset "
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"metadata": {},
	"outputs": [],
	"source": [
	"train_dataset = dsets.MNIST(root='./data', \n",
	" train=True, \n",
	" transform=transforms.ToTensor(),\n",
	" download=True)\n",
	"\n",
	"test_dataset = dsets.MNIST(root='./data', \n",
	" train=False, \n",
	" transform=transforms.ToTensor())"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 7,
	"metadata": {},
	"outputs": [],
	"source": [
	"experiment.log_dataset_hash(train_dataset)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### Make Dataset Iterable (using data loaders)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 8,
	"metadata": {},
	"outputs": [],
	"source": [
	"#set parameters \n",
	"\n",
	"batch_size = 100\n",
	"n_iters = 3000\n",
	"num_epochs = n_iters / (len(train_dataset) / batch_size)\n",
	"num_epochs = int(num_epochs)\n",
	"learning_rate = 0.1\n",
	"\n",
	"params = {\n",
	" \"batch_size\": batch_size,\n",
	" \"n_iters\": n_iters,\n",
	" \"num_epochs\": num_epochs,\n",
	" \"learning_rate\": learning_rate\n",
	"}\n",
	"\n",
	"experiment.log_parameters(params)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 9,
	"metadata": {},
	"outputs": [],
	"source": [
	"train_loader = torch.utils.data.DataLoader(dataset=train_dataset, \n",
	" batch_size=batch_size, \n",
	" shuffle=True)\n",
	"\n",
	"test_loader = torch.utils.data.DataLoader(dataset=test_dataset, \n",
	" batch_size=batch_size, \n",
	" shuffle=False)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### Create the Model Class \n",
	"\n",
	"> Note: Depending on whether you'd like to use the tanh activation or ReLU activation, you should only run one of the following two cells\n",
	"\n"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"#### OPTION 1:\n",
	"Using tanh activation -```self.tanh = nn.Tanh()``` and:\n",
	"- normal weight initialization\n",
	"- lecunn weight initialization\n",
	"- xavier weight initialization"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 12,
	"metadata": {},
	"outputs": [],
	"source": [
	"class FeedforwardNeuralNetModel(nn.Module):\n",
	" def __init__(self, input_dim, hidden_dim, output_dim):\n",
	" super(FeedforwardNeuralNetModel, self).__init__()\n",
	" # Linear function\n",
	" self.fc1 = nn.Linear(input_dim, hidden_dim) \n",
	" ### COMMENT EVERYTHING OUT FOR LECUN ACTIVATION\n",
	" \n",
	" ### FOR NORMAL INITIALIZATION\n",
	" # Linear weight, W, Y = WX + B\n",
	" #nn.init.normal_(self.fc1.weight, mean=0, std=1)\n",
	" ### FOR XAVIER INITIALIZATION\n",
	" # Linear weight, W, Y = WX + B\n",
	" nn.init.xavier_normal_(self.fc1.weight)\n",
	" # Non-linearity\n",
	" self.tanh = nn.Tanh()\n",
	" # Linear function\n",
	" self.fc2 = nn.Linear(hidden_dim, output_dim) \n",
	" #nn.init.normal_(self.fc2.weight, mean=0, std=1)\n",
	" nn.init.xavier_normal_(self.fc2.weight)\n",
	"\n",
	" def forward(self, x):\n",
	" # Linear function\n",
	" out = self.fc1(x)\n",
	" # Non-linearity\n",
	" out = self.tanh(out)\n",
	" # Linear function (readout)\n",
	" out = self.fc2(out)\n",
	" return out\n",
	"\n",
	"#experiment.log_other(\"initialization\", \"normal\")\n",
	"#experiment.log_other(\"initialization\", \"lecun\")\n",
	"experiment.log_other(\"initialization\", \"xavier\")"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"#### OPTION 2:\n",
	"Using ReLU activation -```self.relu = nn.ReLU()``` and:\n",
	"- xavier weight initialization\n",
	"- he weight initialization"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 11,
	"metadata": {},
	"outputs": [],
	"source": [
	"class FeedforwardNeuralNetModel(nn.Module):\n",
	" def __init__(self, input_dim, hidden_dim, output_dim):\n",
	" super(FeedforwardNeuralNetModel, self).__init__()\n",
	" # Linear function\n",
	" self.fc1 = nn.Linear(input_dim, hidden_dim) \n",
	" #### FOR HE WEIGHT INITIALIZATION\n",
	" # Linear weight, W, Y = WX + B\n",
	" # nn.init.kaiming_normal_(self.fc1.weight)\n",
	" ### FOR XAVIER WEIGHT INITIALIZATION\n",
	" # Linear weight W, Y = WX+B\n",
	" nn.init.xavier_normal_(self.fc1.weight)\n",
	" # Non-linearity\n",
	" self.relu = nn.ReLU()\n",
	" # Linear function (readout)\n",
	" self.fc2 = nn.Linear(hidden_dim, output_dim) \n",
	" #nn.init.kaiming_normal_(self.fc2.weight)\n",
	" nn.init.xavier_normal_(self.fc2.weight)\n",
	"\n",
	" def forward(self, x):\n",
	" # Linear function\n",
	" out = self.fc1(x)\n",
	" # Non-linearity\n",
	" out = self.relu(out)\n",
	" # Linear function (readout)\n",
	" out = self.fc2(out)\n",
	" return out\n",
	" \n",
	" \n",
	"# experiment.log_other(\"initialization\", \"he\")\n",
	"experiment.log_other(\"initialization\", \"xavier\")"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### Instantiate Model Class"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 13,
	"metadata": {},
	"outputs": [],
	"source": [
	"input_dim = 28*28\n",
	"hidden_dim = 100\n",
	"output_dim = 10\n",
	"\n",
	"model = FeedforwardNeuralNetModel(input_dim, hidden_dim, output_dim)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### Define Loss Class "
	]
	},
	{
	"cell_type": "code",
	"execution_count": 14,
	"metadata": {},
	"outputs": [],
	"source": [
	"criterion = nn.CrossEntropyLoss()"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### Define Optimizer Class "
	]
	},
	{
	"cell_type": "code",
	"execution_count": 15,
	"metadata": {},
	"outputs": [],
	"source": [
	"optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, nesterov=True)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### Instantiate Step Learning Scheduler Class"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 16,
	"metadata": {},
	"outputs": [],
	"source": [
	"# step_size: at how many multiples of epoch you decay\n",
	"# step_size = 1, after every 2 epoch, new_lr = lr*gamma \n",
	"# step_size = 2, after every 2 epoch, new_lr = lr*gamma \n",
	"# gamma = decaying factor\n",
	"\n",
	"scheduler = StepLR(optimizer, step_size=1, gamma=0.96)\n",
	"experiment.log_parameter(\"gamma\", 0.96)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### Train the Model"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 17,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Epoch: 0 LR: [0.1]\n",
	"Iteration: 500. Loss: 0.18761786818504333. Accuracy: 95.26\n",
	"Epoch: 1 LR: [0.096]\n",
	"Iteration: 1000. Loss: 0.07996946573257446. Accuracy: 96.56\n",
	"Epoch: 2 LR: [0.09216]\n",
	"Iteration: 1500. Loss: 0.03148220479488373. Accuracy: 96.58\n",
	"Epoch: 3 LR: [0.08847359999999999]\n",
	"Iteration: 2000. Loss: 0.09320910274982452. Accuracy: 97.11\n",
	"Epoch: 4 LR: [0.084934656]\n",
	"Iteration: 2500. Loss: 0.03602292016148567. Accuracy: 97.51\n",
	"Iteration: 3000. Loss: 0.010576700791716576. Accuracy: 97.36\n"
	]
	}
	],
	"source": [
	"iter = 0\n",
	"for epoch in range(num_epochs):\n",
	" # Decay Learning Rate\n",
	" scheduler.step()\n",
	" # Print Learning Rate\n",
	" print('Epoch:', epoch,'LR:', scheduler.get_lr())\n",
	" for i, (images, labels) in enumerate(train_loader):\n",
	" # Load images as tensors with gradient accumulation abilities\n",
	" images = images.view(-1, 28*28).requires_grad_()\n",
	"\n",
	" # Clear gradients w.r.t. parameters\n",
	" optimizer.zero_grad()\n",
	"\n",
	" # Forward pass to get output/logits\n",
	" outputs = model(images)\n",
	"\n",
	" # Calculate Loss: softmax --> cross entropy loss\n",
	" loss = criterion(outputs, labels)\n",
	" experiment.log_metric(\"loss\", loss)\n",
	"\n",
	" # Getting gradients w.r.t. parameters\n",
	" loss.backward()\n",
	"\n",
	" # Updating parameters\n",
	" optimizer.step()\n",
	"\n",
	" iter += 1\n",
	"\n",
	" if iter % 500 == 0:\n",
	" # Calculate Accuracy \n",
	" correct = 0\n",
	" total = 0\n",
	" # Iterate through test dataset\n",
	" for images, labels in test_loader:\n",
	" # Load images to a Torch Variable\n",
	" images = images.view(-1, 28*28)\n",
	"\n",
	" # Forward pass only to get logits/output\n",
	" outputs = model(images)\n",
	"\n",
	" # Get predictions from the maximum value\n",
	" _, predicted = torch.max(outputs.data, 1)\n",
	"\n",
	" # Total number of labels\n",
	" total += labels.size(0)\n",
	"\n",
	" # Total correct predictions\n",
	" correct += (predicted.type(torch.FloatTensor).cpu() == labels.type(torch.FloatTensor)).sum()\n",
	"\n",
	" accuracy = 100. * correct.item() / total\n",
	" experiment.log_metric(\"accuracy\", accuracy)\n",
	" \n",
	" # Print Loss\n",
	" print('Iteration: {}. Loss: {}. Accuracy: {}'.format(iter, loss.item(), accuracy))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 18,
	"metadata": {},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"COMET INFO: ----------------------------\n",
	"COMET INFO: Comet.ml Experiment Summary:\n",
	"COMET INFO: Data:\n",
	"COMET INFO: url: https://www.comet.ml/ceceshao1/weight-initialization/b8fec6d781a04706b94d85499e09ec95\n",
	"COMET INFO: Metrics:\n",
	"COMET INFO: accuracy: 97.36\n",
	"COMET INFO: loss: tensor(0.0106, grad_fn=<NllLossBackward>)\n",
	"COMET INFO: Uploading stats to Comet before program termination (may take several seconds)\n"
	]
	}
	],
	"source": [
	"# Run this cell once your model has completed to training to signal the end of the experiment \n",
	"experiment.end()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.6.5"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}