#!/bin/python ### imports (should be covered by requirements.txt) from torch.autograd import Variable from torchvision import transforms from torch import optim import torch.nn as nn import torch.nn.functional as F import torchvision import torch import time import sys import os from collections import OrderedDict import matplotlib.pyplot as plt from PIL import Image ### path definitions model_path = 'weights/vgg_conv_weights.pth' image_path = '' # by default use neural-art as relative dir ### userland testing for multiple instances, a big nono currently n_instances = os.popen('ps aux | grep "python neuralart.py" | wc -l').read() # TODO: add windows commands for platform compatibility :p for the 3 people who need this warning if int(n_instances) > 3: print("Woah, running 2 or more instances of neural-art at the same time?\nThis is an experimental feature as of now... try it later :3") ### check if there are any weights to use, if not, download the default provided ones if int(os.popen('ls -l weights | wc -l').read()) == 1: os.system(f'curl https://files.catbox.moe/wcao20.pth --output {model_path}') # TODO: win commands here as well ### Defining neural architecture ### VGG was trained on IMAGENET ### although old at this point ### it still achieves good results class VGG(nn.Module): def __init__(self, pool='max'): super(VGG, self).__init__() self.conv1_1 = nn.Conv2d(3, 64, kernel_size = 3, padding = 1) self.conv1_2 = nn.Conv2d(64, 64, kernel_size = 3, padding = 1) self.conv2_1 = nn.Conv2d(64, 128, kernel_size = 3, padding = 1) self.conv2_2 = nn.Conv2d(128, 128, kernel_size = 3, padding = 1) self.conv3_1 = nn.Conv2d(128, 256, kernel_size = 3, padding = 1) self.conv3_2 = nn.Conv2d(256, 256, kernel_size = 3, padding = 1) self.conv3_3 = nn.Conv2d(256, 256, kernel_size = 3, padding = 1) self.conv3_4 = nn.Conv2d(256, 256, kernel_size = 3, padding = 1) self.conv4_1 = nn.Conv2d(256, 512, kernel_size = 3, padding = 1) self.conv4_2 = nn.Conv2d(512, 512, kernel_size = 3, padding = 1) self.conv4_3 = nn.Conv2d(512, 512, kernel_size = 3, padding = 1) self.conv4_4 = nn.Conv2d(512, 512, kernel_size = 3, padding = 1) self.conv5_1 = nn.Conv2d(512, 512, kernel_size = 3, padding = 1) self.conv5_2 = nn.Conv2d(512, 512, kernel_size = 3, padding = 1) self.conv5_3 = nn.Conv2d(512, 512, kernel_size = 3, padding = 1) self.conv5_4 = nn.Conv2d(512, 512, kernel_size = 3, padding = 1) # POOLING OPTIONS if pool == 'max': self.pool1 = nn.MaxPool2d(kernel_size = 2, stride = 2) self.pool2 = nn.MaxPool2d(kernel_size = 2, stride = 2) self.pool3 = nn.MaxPool2d(kernel_size = 2, stride = 2) self.pool4 = nn.MaxPool2d(kernel_size = 2, stride = 2) self.pool5 = nn.MaxPool2d(kernel_size = 2, stride = 2) elif pool == 'avg': self.pool1 = nn.AvgPool2d(kernel_size = 2, stride = 2) self.pool2 = nn.AvgPool2d(kernel_size = 2, stride = 2) self.pool3 = nn.AvgPool2d(kernel_size = 2, stride = 2) self.pool4 = nn.AvgPool2d(kernel_size = 2, stride = 2) self.pool5 = nn.AvgPool2d(kernel_size = 2, stride = 2) def forward(self, x, out_keys): out = {} out['r11'] = F.relu(self.conv1_1(x)) out['r12'] = F.relu(self.conv1_2(out['r11'])) out['p1'] = self.pool1(out['r12']) out['r21'] = F.relu(self.conv2_1(out['p1'])) out['r22'] = F.relu(self.conv2_2(out['r21'])) out['p2'] = self.pool2(out['r22']) out['r31'] = F.relu(self.conv3_1(out['p2'])) out['r32'] = F.relu(self.conv3_2(out['r31'])) out['r33'] = F.relu(self.conv3_3(out['r32'])) out['r34'] = F.relu(self.conv3_4(out['r33'])) out['p3'] = self.pool3(out['r34']) out['r41'] = F.relu(self.conv4_1(out['p3'])) out['r42'] = F.relu(self.conv4_2(out['r41'])) out['r43'] = F.relu(self.conv4_3(out['r42'])) out['r44'] = F.relu(self.conv4_4(out['r43'])) out['p4'] = self.pool4(out['r44']) out['r51'] = F.relu(self.conv5_1(out['p4'])) out['r52'] = F.relu(self.conv5_2(out['r51'])) out['r53'] = F.relu(self.conv5_3(out['r52'])) out['r54'] = F.relu(self.conv5_4(out['r53'])) out['p5'] = self.pool5(out['r54']) # RETURN DESIRED ACTIVATIONS return [out[key] for key in out_keys] ### COMPUTING GRAM MATRIX AND GRAM MATRIX LOSS # GRAM MATRICES ARE USED TO MEASURE STYLE LOSS class GramMatrix(nn.Module): def forward(self, input): b, c, w, h = input.size() F = input.view(b, c, h * w) # COMPUTES GRAM MATRIX BY MULTIPLYING INPUT BY TRANPOSE OF ITSELF G = torch.bmm(F, F.transpose(1, 2)) G.div_(h*w) return G class GramMSELoss(nn.Module): def forward(self, input, target): out = nn.MSELoss()(GramMatrix()(input), target) return out ### IMAGE PROCESSING # "based" on how much vram you have, # you can either set this to 1080 # as in 1080p or do what i did: # set the resolution to 720p, and cry. img_size = 720 # PRE-PROCESSING prep = transforms.Compose([transforms.Resize(img_size), transforms.ToTensor(), transforms.Lambda(lambda x: x[torch.LongTensor([2, 1, 0])]), # CONVERT TO BGR FOR VGG transforms.Normalize(mean = [0.40760392, 0.45795686, 0.48501961], std = [1, 1, 1]), # SUBTRACT IMAGENET MEAN transforms.Lambda(lambda x: x.mul_(255)) # VGG WAS TRAINED WITH PIXEL VALUES 0-255 ]) # POST-PROCESSING A postpa = transforms.Compose([transforms.Lambda(lambda x: x.mul_(1./255)), # REVERT EVERYTHING DONE IN THE PRE-PROCESSING STEP transforms.Normalize(mean = [-0.40760392, -0.45795686, -0.48501961], std = [1, 1, 1]), transforms.Lambda(lambda x: x[torch.LongTensor([2,1,0])]) ]) # POST-PROCESSING B postpb = transforms.Compose([transforms.ToPILImage()]) # POST PROCESSING FUNCTION INCORPORATES A AND B, AND CLIPS PIXEL VALUES WHICH ARE OUT OF RANGE def postp(tensor): t = postpa(tensor) t[t>1] = 1 # everything above 1 receives value 1 t[t<0] = 0 # analogous for everything lower than 0 img = postpb(t) return img ### PREPARING NETWORK ARCHITECTURE vgg = VGG() vgg.load_state_dict(torch.load(model_path)) for param in vgg.parameters(): param.requires_grad = False if torch.cuda.is_available(): vgg.cuda() ### LOADING AND PREPARING IMAGES img_paths = [image_path, image_path] # IMAGE LOADING ORDER: [STYLE, CONTENT] img_names = [sys.argv[1], sys.argv[2]] imgs = [Image.open(img_paths[i] + name) for i, name in enumerate(img_names)] imgs_torch = [prep(img) for img in imgs] # HANDLE CUDA if torch.cuda.is_available(): imgs_torch = [Variable(img.unsqueeze(0)).cuda() for img in imgs_torch] else: imgs_torch = [Variable(img.unsqueeze(0)) for img in imgs_torch] style_img, content_img = imgs_torch # SET UP IMAGE TO BE OPTIMIZED # CAN BE INITIALIZED RANDOMLY # OR AS A CLONE OF CONTENT IMAGE opt_img = Variable(content_img.clone(), requires_grad = True) print("Content size:", content_img.size(), sys.argv[2], "in", sys.argv[1]) print("Target size:", opt_img.size(), end="\n\n") ### SETUP FOR TRAINING # LAYERS FOR STYLE AND CONTENT LOSS style_layers = ['r11', 'r12', 'r31', 'r41', 'r51'] content_layers = ['r42'] loss_layers = style_layers + content_layers # CREATING LOSS FUNCTION loss_fns = [GramMSELoss()] * len(style_layers) + [nn.MSELoss()] * len(content_layers) if torch.cuda.is_available(): loss_fns = [loss_fn.cuda() for loss_fn in loss_fns] # SETUP WEIGHTS FOR LOSS LAYERS style_weights = [1e3/n**2 for n in [64, 128, 256, 512, 512]] content_weights = [1e0] weights = style_weights + content_weights # CREATE OPTIMIZATION TARGETS style_targets = [GramMatrix()(A).detach() for A in vgg(style_img, style_layers)] content_targets = [A.detach() for A in vgg(content_img, content_layers)] targets = style_targets + content_targets ### TRAINING LOOP import numpy as np from tqdm import tqdm vis_factor = 10 # every 10 iterations, a frame snapshot is saved, we use this coeficient to scale max_iter = 600 * vis_factor show_iter = 1 * vis_factor optimizer = optim.LBFGS([opt_img]) n_iter = [0] image_array = [] with tqdm(total=max_iter, miniters=0, smoothing=0) as pbar: while n_iter[0] <= max_iter - 9: # weird behavior here def closure(): optimizer.zero_grad() # FORWARD out = vgg(opt_img, loss_layers) # LOSS layer_losses = [weights[a] * loss_fns[a](A, targets[a]) for a,A in enumerate(out)] loss = sum(layer_losses) # BACKWARDS loss.backward() # TRACK PROGRESS if n_iter[0] % show_iter == 0: out_img = postp(opt_img.data[0].cpu().squeeze()) image_array.append(np.array(out_img)) pbar.update(show_iter) n_iter[0] += 1 return loss optimizer.step(closure) # SAVE ARRAY TO FILE image_array = np.array(image_array) np.save('images.npy', image_array) print("")