denisyarats · July 3, 2019 21:17
diff --git a/gistfile1.txt b/gistfile1.txt
 index ede2865..de5eb9f 100755
 --- a/examples/maml-omniglot.py
 +++ b/examples/maml-omniglot.py
 @@ -30,6 +30,7 @@ import higher

 from omniglot_loaders import OmniglotNShot

 +
 def main():
     argparser = argparse.ArgumentParser()
     argparser.add_argument('--n_way', type=int, help='n way', default=5)
 @@ -58,23 +59,22 @@ def main():
     # Before higher, models could *not* be created like this
     # and the parameters needed to be manually updated and copied
     # for the updates.
 +
     net = nn.Sequential(
 -        nn.Conv2d(1, 64, 3, 2),
 -        nn.ReLU(inplace=True),
 -        nn.BatchNorm2d(64),
 -        nn.Conv2d(64, 64, 3, 2),
 +        nn.Conv2d(1, 64, 3),
 +        nn.BatchNorm2d(64, momentum=1, affine=True),
         nn.ReLU(inplace=True),
 -        nn.BatchNorm2d(64),
 -        nn.Conv2d(64, 64, 3, 2),
 +        nn.MaxPool2d(2, 2),
 +        nn.Conv2d(64, 64, 3),
 +        nn.BatchNorm2d(64, momentum=1, affine=True),
         nn.ReLU(inplace=True),
 -        nn.BatchNorm2d(64),
 -        nn.Conv2d(64, 64, 2, 1),
 +        nn.MaxPool2d(2, 2),
 +        nn.Conv2d(64, 64, 3),
 +        nn.BatchNorm2d(64, momentum=1, affine=True),
         nn.ReLU(inplace=True),
 -        nn.BatchNorm2d(64),
 +        nn.MaxPool2d(2,2),
         Flatten(),
 -        nn.Linear(64, args.n_way)
 -    ).to(device)
 -
 +        nn.Linear(64, args.n_way)).to(device)
     # We will use Adam to (meta-)optimize the initial parameters
     # to be adapted.
     meta_opt = optim.Adam(net.parameters(), lr=1e-3)
 @@ -90,6 +90,7 @@ def train(db, net, device, meta_opt, epoch, log):
     net.train()
     n_train_iter = db.x_train.shape[0] // db.batchsz

 +
     for batch_idx in range(n_train_iter):
         # Sample a batch of support and query images and labels.
         x_spt, y_spt, x_qry, y_qry = db.next()
 @@ -107,7 +108,7 @@ def train(db, net, device, meta_opt, epoch, log):
         # Initialize the inner optimizer to adapt the parameters to
         # the support set.
         n_inner_iter = 1
 -        inner_opt = torch.optim.SGD(net.parameters(), lr=4e-1)
 +        inner_opt = torch.optim.SGD(net.parameters(), lr=1e-1)

         qry_losses = []
         qry_accs = []
 @@ -167,6 +168,7 @@ def test(db, net, device, epoch, log):

     qry_losses = []
     qry_accs = []
 +

     for batch_idx in range(n_test_iter):
         x_spt, y_spt, x_qry, y_qry = db.next('test')
 @@ -180,8 +182,8 @@ def test(db, net, device, epoch, log):

         # TODO: Maybe pull this out into a separate module so it
         # doesn't have to be duplicated between `train` and `test`?
 -        n_inner_iter = 3
 -        inner_opt = torch.optim.SGD(net.parameters(), lr=4e-1)
 +        n_inner_iter = 5
 +        inner_opt = torch.optim.SGD(net.parameters(), lr=1e-1)

         for i in range(task_num):
             with higher.innerloop_ctx(net, inner_opt) as (fnet, diffopt):
 @@ -195,7 +197,7 @@ def test(db, net, device, epoch, log):

                 # The query loss and acc induced by these parameters.
                 qry_logits = fnet(x_qry[i]).detach()
 -                qry_loss = F.cross_entropy(qry_logits, y_qry[i], reduction='none')
 +                qry_loss = F.cross_entropy(qry_logits, y_qry[i], reduction='none').detach()
                 qry_losses.append(qry_loss)
                 qry_accs.append(
                     qry_logits.argmax(dim=1) == y_qry[i]
	index ede2865..de5eb9f 100755
	--- a/examples/maml-omniglot.py
	+++ b/examples/maml-omniglot.py
	@@ -30,6 +30,7 @@ import higher

	from omniglot_loaders import OmniglotNShot

	+
	def main():
	argparser = argparse.ArgumentParser()
	argparser.add_argument('--n_way', type=int, help='n way', default=5)
	@@ -58,23 +59,22 @@ def main():
	# Before higher, models could not be created like this
	# and the parameters needed to be manually updated and copied
	# for the updates.
	+
	net = nn.Sequential(
	- nn.Conv2d(1, 64, 3, 2),
	- nn.ReLU(inplace=True),
	- nn.BatchNorm2d(64),
	- nn.Conv2d(64, 64, 3, 2),
	+ nn.Conv2d(1, 64, 3),
	+ nn.BatchNorm2d(64, momentum=1, affine=True),
	nn.ReLU(inplace=True),
	- nn.BatchNorm2d(64),
	- nn.Conv2d(64, 64, 3, 2),
	+ nn.MaxPool2d(2, 2),
	+ nn.Conv2d(64, 64, 3),
	+ nn.BatchNorm2d(64, momentum=1, affine=True),
	nn.ReLU(inplace=True),
	- nn.BatchNorm2d(64),
	- nn.Conv2d(64, 64, 2, 1),
	+ nn.MaxPool2d(2, 2),
	+ nn.Conv2d(64, 64, 3),
	+ nn.BatchNorm2d(64, momentum=1, affine=True),
	nn.ReLU(inplace=True),
	- nn.BatchNorm2d(64),
	+ nn.MaxPool2d(2,2),
	Flatten(),
	- nn.Linear(64, args.n_way)
	- ).to(device)
	-
	+ nn.Linear(64, args.n_way)).to(device)
	# We will use Adam to (meta-)optimize the initial parameters
	# to be adapted.
	meta_opt = optim.Adam(net.parameters(), lr=1e-3)
	@@ -90,6 +90,7 @@ def train(db, net, device, meta_opt, epoch, log):
	net.train()
	n_train_iter = db.x_train.shape[0] // db.batchsz

	+
	for batch_idx in range(n_train_iter):
	# Sample a batch of support and query images and labels.
	x_spt, y_spt, x_qry, y_qry = db.next()
	@@ -107,7 +108,7 @@ def train(db, net, device, meta_opt, epoch, log):
	# Initialize the inner optimizer to adapt the parameters to
	# the support set.
	n_inner_iter = 1
	- inner_opt = torch.optim.SGD(net.parameters(), lr=4e-1)
	+ inner_opt = torch.optim.SGD(net.parameters(), lr=1e-1)

	qry_losses = []
	qry_accs = []
	@@ -167,6 +168,7 @@ def test(db, net, device, epoch, log):

	qry_losses = []
	qry_accs = []
	+

	for batch_idx in range(n_test_iter):
	x_spt, y_spt, x_qry, y_qry = db.next('test')
	@@ -180,8 +182,8 @@ def test(db, net, device, epoch, log):

	# TODO: Maybe pull this out into a separate module so it
	# doesn't have to be duplicated between `train` and `test`?
	- n_inner_iter = 3
	- inner_opt = torch.optim.SGD(net.parameters(), lr=4e-1)
	+ n_inner_iter = 5
	+ inner_opt = torch.optim.SGD(net.parameters(), lr=1e-1)

	for i in range(task_num):
	with higher.innerloop_ctx(net, inner_opt) as (fnet, diffopt):
	@@ -195,7 +197,7 @@ def test(db, net, device, epoch, log):

	# The query loss and acc induced by these parameters.
	qry_logits = fnet(x_qry[i]).detach()
	- qry_loss = F.cross_entropy(qry_logits, y_qry[i], reduction='none')
	+ qry_loss = F.cross_entropy(qry_logits, y_qry[i], reduction='none').detach()
	qry_losses.append(qry_loss)
	qry_accs.append(
	qry_logits.argmax(dim=1) == y_qry[i]