initial migration of the project

author: Alphara <alphara@deltarion> 2023-02-12 01:15:54 +0200
committer: Alphara <alphara@deltarion> 2023-02-12 01:15:54 +0200
commit: c43fdcf1ee45ecd112411f77b2519cbee9941b04 (patch)
tree: f628b7eefd1476dbcac1340f3eaec54cd526617c
parent: 7c84258c739c7c998526ad86fbea51bb6e8c6f7a (diff)
7 files changed, 342 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
index b6e4761..4ec1e79 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,7 @@
+### custom ignores here
+Images/*
+images.npy
+
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
diff --git a/clear_dir.sh b/clear_dir.sh
new file mode 100755
index 0000000..6b7e3db
--- /dev/null
+++ b/clear_dir.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+
+### clearing up the directory
+### from temporary files
+
+rm -rf __pycache__
+rm -f Output/* images.npy
diff --git a/neuralart.py b/neuralart.py
new file mode 100644
index 0000000..4b29d21
--- /dev/null
+++ b/neuralart.py
@@ -0,0 +1,281 @@
+#!/bin/python
+
+### imports (will come with a requirements.txt)
+
+from torch.autograd import Variable
+from torchvision import transforms
+from torch import optim
+
+import torch.nn as nn
+import torch.nn.functional as F
+
+import torchvision
+import torch
+
+import time
+import sys
+import os
+
+from collections import OrderedDict
+
+import matplotlib.pyplot as plt
+
+from PIL import Image
+
+### path definitions
+
+model_path = 'weights/vgg_conv_weights.pth'
+image_path = '' # root (neural-art) directory
+
+### VGG was trained on IMAGENET
+### although old at this point
+### it still achieves good results
+
+class VGG(nn.Module):
+    def __init__(self, pool='max'):
+        super(VGG, self).__init__()
+
+        self.conv1_1 = nn.Conv2d(3, 64, kernel_size = 3, padding = 1)
+        self.conv1_2 = nn.Conv2d(64, 64, kernel_size = 3, padding = 1)
+
+        self.conv2_1 = nn.Conv2d(64, 128, kernel_size = 3, padding = 1)
+        self.conv2_2 = nn.Conv2d(128, 128, kernel_size = 3, padding = 1)
+
+        self.conv3_1 = nn.Conv2d(128, 256, kernel_size = 3, padding = 1)
+        self.conv3_2 = nn.Conv2d(256, 256, kernel_size = 3, padding = 1)
+        self.conv3_3 = nn.Conv2d(256, 256, kernel_size = 3, padding = 1)
+        self.conv3_4 = nn.Conv2d(256, 256, kernel_size = 3, padding = 1)
+
+        self.conv4_1 = nn.Conv2d(256, 512, kernel_size = 3, padding = 1)
+        self.conv4_2 = nn.Conv2d(512, 512, kernel_size = 3, padding = 1)
+        self.conv4_3 = nn.Conv2d(512, 512, kernel_size = 3, padding = 1)
+        self.conv4_4 = nn.Conv2d(512, 512, kernel_size = 3, padding = 1)
+
+        self.conv5_1 = nn.Conv2d(512, 512, kernel_size = 3, padding = 1)
+        self.conv5_2 = nn.Conv2d(512, 512, kernel_size = 3, padding = 1)
+        self.conv5_3 = nn.Conv2d(512, 512, kernel_size = 3, padding = 1)
+        self.conv5_4 = nn.Conv2d(512, 512, kernel_size = 3, padding = 1)
+
+        # POOLING OPTIONS
+        if pool == 'max':
+            self.pool1 = nn.MaxPool2d(kernel_size = 2, stride = 2)
+            self.pool2 = nn.MaxPool2d(kernel_size = 2, stride = 2)
+            self.pool3 = nn.MaxPool2d(kernel_size = 2, stride = 2)
+            self.pool4 = nn.MaxPool2d(kernel_size = 2, stride = 2)
+            self.pool5 = nn.MaxPool2d(kernel_size = 2, stride = 2)
+        elif pool == 'avg':
+            self.pool1 = nn.AvgPool2d(kernel_size = 2, stride = 2)
+            self.pool2 = nn.AvgPool2d(kernel_size = 2, stride = 2)
+            self.pool3 = nn.AvgPool2d(kernel_size = 2, stride = 2)
+            self.pool4 = nn.AvgPool2d(kernel_size = 2, stride = 2)
+            self.pool5 = nn.AvgPool2d(kernel_size = 2, stride = 2)
+
+    def forward(self, x, out_keys):
+        out = {}
+
+        out['r11'] = F.relu(self.conv1_1(x))
+        out['r12'] = F.relu(self.conv1_2(out['r11']))
+        out['p1'] = self.pool1(out['r12'])
+
+        out['r21'] = F.relu(self.conv2_1(out['p1']))
+        out['r22'] = F.relu(self.conv2_2(out['r21']))
+        out['p2'] = self.pool2(out['r22'])
+
+        out['r31'] = F.relu(self.conv3_1(out['p2']))
+        out['r32'] = F.relu(self.conv3_2(out['r31']))
+        out['r33'] = F.relu(self.conv3_3(out['r32']))
+        out['r34'] = F.relu(self.conv3_4(out['r33']))
+        out['p3'] = self.pool3(out['r34'])
+
+        out['r41'] = F.relu(self.conv4_1(out['p3']))
+        out['r42'] = F.relu(self.conv4_2(out['r41']))
+        out['r43'] = F.relu(self.conv4_3(out['r42']))
+        out['r44'] = F.relu(self.conv4_4(out['r43']))
+        out['p4'] = self.pool4(out['r44'])
+
+        out['r51'] = F.relu(self.conv5_1(out['p4']))
+        out['r52'] = F.relu(self.conv5_2(out['r51']))
+        out['r53'] = F.relu(self.conv5_3(out['r52']))
+        out['r54'] = F.relu(self.conv5_4(out['r53']))
+        out['p5'] = self.pool5(out['r54'])
+
+
+        # RETURN DESIRED ACTIVATIONS
+        return [out[key] for key in out_keys]
+
+
+
+### COMPUTING GRAM MATRIX AND GRAM MATRIX LOSS
+
+# GRAM MATRICES ARE USED TO MEASURE STYLE LOSS
+class GramMatrix(nn.Module):
+
+    def forward(self, input):
+        b, c, w, h = input.size()
+        F = input.view(b, c, h * w)
+
+        # COMPUTES GRAM MATRIX BY MULTIPLYING INPUT BY TRANPOSE OF ITSELF
+        G = torch.bmm(F, F.transpose(1, 2))
+        G.div_(h*w)
+
+        return G
+
+class GramMSELoss(nn.Module):
+
+    def forward(self, input, target):
+        out = nn.MSELoss()(GramMatrix()(input), target)
+
+        return out
+
+
+
+### IMAGE PROCESSING
+
+# "based" on how much vram you have,
+# you can either set this to 1080
+# as in 1080p or do what i did:
+# set the resolution to 720p, and cry.
+
+img_size = 720
+
+# PRE-PROCESSING
+prep = transforms.Compose([transforms.Resize(img_size),
+                           transforms.ToTensor(),
+                           transforms.Lambda(lambda x: x[torch.LongTensor([2, 1, 0])]), # CONVERT TO BGR FOR VGG
+                           transforms.Normalize(mean = [0.40760392, 0.45795686, 0.48501961], std = [1, 1, 1]), # SUBTRACT IMAGENET MEAN
+                           transforms.Lambda(lambda x: x.mul_(255)) # VGG WAS TRAINED WITH PIXEL VALUES 0-255
+                           ])
+
+# POST-PROCESSING A
+postpa = transforms.Compose([transforms.Lambda(lambda x: x.mul_(1./255)),
+                            transforms.Normalize(mean = [-0.40760392, -0.45795686, -0.48501961], std = [1, 1, 1]),
+                            transforms.Lambda(lambda x: x[torch.LongTensor([2,1,0])])
+                            ])
+
+# POST-PROCESSING B
+postpb = transforms.Compose([transforms.ToPILImage()])
+
+# POST PROCESSING FUNCTION INCORPORATES A AND B, AND CLIPS PIXEL VALUES WHICH ARE OUT OF RANGE
+def postp(tensor):
+    t = postpa(tensor)
+    t[t>1] = 1 # everything above 1 receives value 1
+    t[t<0] = 0 # analogous for everything lower than 0
+    img = postpb(t)
+    return img
+
+
+
+### PREPARING NETWORK ARCHITECTURE
+
+vgg = VGG()
+
+vgg.load_state_dict(torch.load(model_path))
+
+for param in vgg.parameters():
+    param.requires_grad = False
+
+if torch.cuda.is_available():
+    vgg.cuda()
+
+
+
+### LOADING AND PREPARING IMAGES
+
+img_paths = [image_path, image_path]
+
+# IMAGE LOADING ORDER: [STYLE, CONTENT]
+img_names = [sys.argv[1], sys.argv[2]]
+imgs = [Image.open(img_paths[i] + name) for i, name in enumerate(img_names)]
+imgs_torch = [prep(img) for img in imgs]
+
+# HANDLE CUDA
+if torch.cuda.is_available():
+    imgs_torch = [Variable(img.unsqueeze(0)).cuda() for img in imgs_torch]
+else:
+    imgs_torch = [Variable(img.unsqueeze(0)) for img in imgs_torch]
+
+style_img, content_img = imgs_torch
+
+# SET UP IMAGE TO BE OPTIMIZED
+# CAN BE INITIALIZED RANDOMLY
+# OR AS A CLONE OF CONTENT IMAGE
+opt_img = Variable(content_img.clone(), requires_grad = True)
+print("Content size:", content_img.size())
+print("Target size:", opt_img.size(), end="\n\n")
+
+
+
+### SETUP FOR TRAINING
+
+# LAYERS FOR STYLE AND CONTENT LOSS
+style_layers = ['r11', 'r12', 'r31', 'r41', 'r51']
+content_layers = ['r42']
+loss_layers = style_layers + content_layers
+
+# CREATING LOSS FUNCTION
+loss_fns = [GramMSELoss()] * len(style_layers) + [nn.MSELoss()] * len(content_layers)
+if torch.cuda.is_available():
+    loss_fns = [loss_fn.cuda() for loss_fn in loss_fns]
+
+# SETUP WEIGHTS FOR LOSS LAYERS
+style_weights = [1e3/n**2 for n in [64, 128, 256, 512, 512]]
+content_weights = [1e0]
+weights = style_weights + content_weights
+
+# CREATE OPTIMIZATION TARGETS
+style_targets = [GramMatrix()(A).detach() for A in vgg(style_img, style_layers)]
+content_targets = [A.detach() for A in vgg(content_img, content_layers)]
+targets = style_targets + content_targets
+
+
+
+### TRAINING LOOP
+
+import numpy as np
+from tqdm import tqdm
+
+vis_factor = 3
+max_iter = 600 * vis_factor
+show_iter = 1 * vis_factor
+
+optimizer = optim.LBFGS([opt_img])
+n_iter = [0]
+
+image_array = []
+
+with tqdm(total=max_iter) as pbar:
+    while n_iter[0] <= max_iter - 9: # weird behavior here
+
+        def closure():
+            optimizer.zero_grad()
+
+            # FORWARD
+            out = vgg(opt_img, loss_layers)
+
+            # LOSS
+            layer_losses = [weights[a] * loss_fns[a](A, targets[a]) for a,A in enumerate(out)]
+            loss = sum(layer_losses)
+
+            # BACKWARDS
+            loss.backward()
+
+            # TRACK PROGRESS
+            if n_iter[0] % show_iter == 0:
+
+                out_img = postp(opt_img.data[0].cpu().squeeze())
+                image_array.append(np.array(out_img))
+
+                pbar.update(show_iter)
+
+            n_iter[0] += 1
+
+            return loss
+
+        optimizer.step(closure)
+
+# SAVE ARRAY TO FILE
+
+image_array = np.array(image_array)
+np.save('images.npy', image_array)
+
+print("")
diff --git a/preview/Starry_Night_in_Jitter_Doll.mp4 b/preview/Starry_Night_in_Jitter_Doll.mp4
new file mode 100644
index 0000000..b6a264d
--- /dev/null
+++ b/preview/Starry_Night_in_Jitter_Doll.mp4
diff --git a/preview/cute_in_Jitter_Doll.mp4 b/preview/cute_in_Jitter_Doll.mp4
new file mode 100644
index 0000000..15e0a23
--- /dev/null
+++ b/preview/cute_in_Jitter_Doll.mp4
diff --git a/renderer.py b/renderer.py
new file mode 100644
index 0000000..98b32f5
--- /dev/null
+++ b/renderer.py
@@ -0,0 +1,32 @@
+#!/bin/python
+
+### very slow step that will be assigned to multiple
+### jobs later in the development of this tool
+
+### data loading
+
+import numpy as np
+
+image_array = np.load("images.npy", allow_pickle=True)
+
+### progress bar
+
+from tqdm import tqdm
+
+pbar = tqdm(total = len(image_array))
+
+### rendering of images
+
+import matplotlib.pyplot as plt
+
+def render(index):
+    name = 'Output/neural_art_{:04d}.png'.format(index + 1)
+
+    plt.axis('off')
+    plt.imshow(image_array[index])
+    plt.savefig(name, dpi=258, bbox_inches='tight', pad_inches=0) # dpi 258 -> 720p ; dpi 387 -> 1080p output image resolution
+    plt.close('all')
+
+for index in range(0, len(image_array)):
+    render(index)
+    pbar.update(1)
diff --git a/stylize.sh b/stylize.sh
new file mode 100755
index 0000000..0095629
--- /dev/null
+++ b/stylize.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+### You must be in the 'neural-art' directory when you run this
+
+if [ $# -eq 2 ]
+  then
+    echo "Video file needs to be specified"
+    exit 1
+fi
+
+# stylize data [pair (style, content)]
+python neuralart.py $1 $2
+
+# render images (actual frames) from images.npy
+python renderer.py
+
+# turn everything into a video
+ffmpeg -framerate 60 -pattern_type glob -i 'Output/neural_art_*.png' -vf "pad=ceil(iw/2)*2:ceil(ih/2)*2" $3
author	Alphara <alphara@deltarion>	2023-02-12 01:15:54 +0200
committer	Alphara <alphara@deltarion>	2023-02-12 01:15:54 +0200
commit	c43fdcf1ee45ecd112411f77b2519cbee9941b04 (patch)
tree	f628b7eefd1476dbcac1340f3eaec54cd526617c
parent	7c84258c739c7c998526ad86fbea51bb6e8c6f7a (diff)