Last active
April 8, 2025 08:10
-
Star
(115)
You must be signed in to star a gist -
Fork
(6)
You must be signed in to fork a gist
-
-
Save alper111/8233cdb0414b4cb5853f2f730ab95a49 to your computer and use it in GitHub Desktop.
PyTorch implementation of VGG perceptual loss
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import torchvision | |
class VGGPerceptualLoss(torch.nn.Module): | |
def __init__(self, resize=True): | |
super(VGGPerceptualLoss, self).__init__() | |
blocks = [] | |
blocks.append(torchvision.models.vgg16(pretrained=True).features[:4].eval()) | |
blocks.append(torchvision.models.vgg16(pretrained=True).features[4:9].eval()) | |
blocks.append(torchvision.models.vgg16(pretrained=True).features[9:16].eval()) | |
blocks.append(torchvision.models.vgg16(pretrained=True).features[16:23].eval()) | |
for bl in blocks: | |
for p in bl.parameters(): | |
p.requires_grad = False | |
self.blocks = torch.nn.ModuleList(blocks) | |
self.transform = torch.nn.functional.interpolate | |
self.resize = resize | |
self.register_buffer("mean", torch.tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1)) | |
self.register_buffer("std", torch.tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1)) | |
def forward(self, input, target, feature_layers=[0, 1, 2, 3], style_layers=[]): | |
if input.shape[1] != 3: | |
input = input.repeat(1, 3, 1, 1) | |
target = target.repeat(1, 3, 1, 1) | |
input = (input-self.mean) / self.std | |
target = (target-self.mean) / self.std | |
if self.resize: | |
input = self.transform(input, mode='bilinear', size=(224, 224), align_corners=False) | |
target = self.transform(target, mode='bilinear', size=(224, 224), align_corners=False) | |
loss = 0.0 | |
x = input | |
y = target | |
for i, block in enumerate(self.blocks): | |
x = block(x) | |
y = block(y) | |
if i in feature_layers: | |
loss += torch.nn.functional.l1_loss(x, y) | |
if i in style_layers: | |
act_x = x.reshape(x.shape[0], x.shape[1], -1) | |
act_y = y.reshape(y.shape[0], y.shape[1], -1) | |
gram_x = act_x @ act_x.permute(0, 2, 1) | |
gram_y = act_y @ act_y.permute(0, 2, 1) | |
loss += torch.nn.functional.l1_loss(gram_x, gram_y) | |
return loss |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Yes, I believe we don't need to track the gradient for the target when feeding it through VGG.