Utility of Convolutional Neural Network over Multilayer Perceptron
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
input_size = 784 # 28x28
hidden_size = 500
num_classes = 10
num_epochs = 2
batch_size = 100
learning_rate = 0.001
# -------------------------------Import MNIST dataset-----------------------------------------
# train data
train_dataset = torchvision.datasets.MNIST(root='./data',train=True, transform=transforms.ToTensor(), download=True)
# load test data
test_dataset = torchvision.datasets.MNIST(root='./data',train=False,transform= transforms.ToTensor())
# mnist in MLP net
# Data loader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size,shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,batch_size=batch_size,shuffle=False)
# ----------view-----------
examples = iter(test_loader)
example_data, example_targets = examples.next()
for i in range(6):
plt.subplot(2,3,i+1)
plt.imshow(example_data[i][0], cmap='gray')
plt.show()
# Fully connected neural network with one hidden layer
class NeuralNet(nn.Module):
def __init__(self, input_size, hidden_size, num_classes):
super(NeuralNet, self).__init__()
self.input_size = input_size
self.l1 = nn.Linear(input_size, hidden_size)
self.relu = nn.ReLU()
self.l2 = nn.Linear(hidden_size, num_classes)
def forward(self, x):
out = self.l1(x)
out = self.relu(out)
out = self.l2(out)
return out
# device
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = NeuralNet(input_size, hidden_size, num_classes).to(device)
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
n_total_steps = len(train_loader)
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader):
# origin shape: [100, 1, 28, 28]
# resized: [100, 784]
images = images.reshape(-1, 28*28).to(device)
labels = labels.to(device)
# Forward pass
outputs = model(images)
loss = criterion(outputs, labels)
# Backward and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()
# To Print the Loss at every 100th step and show our total steps :
if (i+1) % 100 == 0:
print (f'Epoch [{epoch+1}/{num_epochs}], Step[{i+1}/{n_total_steps}], Loss: {loss.item():.4f}')
with torch.no_grad(): #deactivates autograd engine. To perform inference without Gradient Calculation. Reducex the memory usage and speed up computations.
n_correct = 0
n_samples = 0
for images, labels in test_loader:
images = images.reshape(-1, 28*28).to(device)
labels = labels.to(device)
outputs = model(images)
# max returns (value ,index)
_, predicted = torch.max(outputs.data, 1)
n_samples += labels.size(0)
n_correct += (predicted == labels).sum().item()
#Print The Total Accuracy
acc = 100.0 * n_correct / n_samples
print(f'Accuracy of the network on the 10000 test images: {acc} %')
# mnist using CNN
import torch.nn as nn
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.conv1 = nn.Sequential(
nn.Conv2d(
in_channels=1,
out_channels=16,
kernel_size=5,
stride=1,
padding=2,
),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2),
)
self.conv2 = nn.Sequential(
nn.Conv2d(16, 32, 5, 1, 2),
nn.ReLU(),
nn.MaxPool2d(2),
) # fully connected layer, output 10 classes
self.out = nn.Linear(32 * 7 * 7, 10)
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x) # flatten the output of conv2 to (batch_size, 32 * 7 * 7)
x = x.view(x.size(0), -1)
output = self.out(x)
return output, x # return x for visualization
cnn = CNN()
print(cnn)
loss_func = nn.CrossEntropyLoss()
from torch import optim
optimizer = optim.Adam(cnn.parameters(), lr = 0.01)
optimizer
num_epochs = 5
def train(num_epochs, cnn, loaders):
# Train the model
total_step = len(loaders['train'])
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(loaders['train']):
output = cnn(images)[0]
loss = loss_func(output, labels)
# clear gradients for this training step
optimizer.zero_grad()
# backpropagation, compute gradients
loss.backward() # apply gradients
optimizer.step()
if (i+1) % 100 == 0:
print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(epoch + 1, num_epochs, i + 1, total_step, loss.item()))
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor
train_data = datasets.MNIST(
root = 'data',
train = True,
transform = ToTensor(),
download = True,
)
test_data = datasets.MNIST(
root = 'data',
train = False,
transform = ToTensor()
)
loaders = {
'train' : torch.utils.data.DataLoader(train_data,
batch_size=100,
shuffle=True,
num_workers=1),
'test' : torch.utils.data.DataLoader(test_data,
batch_size=100,
shuffle=True,
num_workers=1),
}
train(num_epochs, cnn, loaders)
def test():
# Test the model
with torch.no_grad():
correct = 0
total = 0
for images, labels in loaders['test']:
test_output, last_layer = cnn(images)
pred_y = torch.max(test_output, 1)[1].data.squeeze()
accuracy = (pred_y == labels).sum().item() / float(labels.size(0))
pass
print('Test Accuracy of the model on the 10000 test images: %.2f' % accuracy)
pass
test()
# check network parameter count
from torchsummary import summary
import torch
summary (model, input_size=(1,28*28))
summary (cnn, input_size=(1,28,28))
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from torchsummary import summary
import torch
# No zero padding, unit strides
i = 4; # input
k = 3; #kernel size
s = 1; #stride
p = 0; #padding
o = (i - k) + 1
print("output size -> ",o)
print('-----------------------------------------')
class NeuralNet(nn.Module):
def __init__(self):
super(NeuralNet, self).__init__()
# torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0)
self.conv1 = nn.Conv2d(1,1,k,(s,s), padding = (p, p))
def forward(self,x):
x=self.conv1(x)
output = F.log_softmax(x, dim=1)
return output
model = NeuralNet()
print('Model Summary')
summary (model, input_size=(1,i,i))
# zero padding, unit strides
i = 5; # input
k = 4; #kernel size
s = 1; #stride
p = 2; #padding
o = (i - k) + 2*p + 1
print("output size -> ",o)
print('-----------------------------------------')
class NeuralNet(nn.Module):
def __init__(self):
super(NeuralNet, self).__init__()
# torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0)
self.conv1 = nn.Conv2d(1,1,k,(s,s), padding = (p, p))
def forward(self,x):
x=self.conv1(x)
output = F.log_softmax(x, dim=1)
return output
model = NeuralNet()
print('Model Summary')
summary (model, input_size=(1,i,i))
$o = (i - k) + 2*p + 1$
Now we want $o = i$
Putting $o = i$ we get
$i = (i - k) + 2*p + 1$
$p = (k - 1)/2$
$p = (k - 1)/2$
Therefore when k is odd to express p as integer,
$p = \lfloor k/2 \rfloor$
# Half (same) padding -> output size be the same as the input size (i.e., o = i)
# Having the output size be the same as the input size (i.e., o = i) can be a desirable property
i = 5; # input
k = 3; #kernel size
s = 1; #stride
p = int(np.floor(k/2)); #padding
o = (i - k) + 2*p + 1
print("output size -> ",o)
print('-----------------------------------------')
class NeuralNet(nn.Module):
def __init__(self):
super(NeuralNet, self).__init__()
# torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0)
self.conv1 = nn.Conv2d(1,1,k,(s,s), padding = (p, p))
def forward(self,x):
x=self.conv1(x)
output = F.log_softmax(x, dim=1)
return output
model = NeuralNet()
print('Model Summary')
summary (model, input_size=(1,i,i))
While convolving a kernel generally decreases the output size with respect to the input size, sometimes the opposite is required. This can be achieved with proper zero padding
# Full padding
i = 5; # input
k = 3; #kernel size
s = 1; #stride
p = k - 1; #padding
o = (i - k) + 2*p + 1
print("output size -> ",o)
print('-----------------------------------------')
class NeuralNet(nn.Module):
def __init__(self):
super(NeuralNet, self).__init__()
# torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0)
self.conv1 = nn.Conv2d(1,32,k,(s,s), padding = (p, p))
def forward(self,x):
x=self.conv1(x)
output = F.log_softmax(x, dim=1)
return output
model = NeuralNet()
print('Model Summary')
summary (model, input_size=(1,i,i))
If we want to increase the output size further can we increase the padding ?
Ans. Computationally yes.
But there is a severe problem.
# Zero padding, non-unit strides
i = 5; # input
k = 3; #kernel size
s = 2; #stride
p = 1; #padding
o = int(np.floor((i + 2*p - k)/s)) + 1
print("output size -> ",o)
print('-----------------------------------------')
class NeuralNet(nn.Module):
def __init__(self):
super(NeuralNet, self).__init__()
# torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0)
self.conv1 = nn.Conv2d(1,1,k,(s,s), padding = (p, p))
def forward(self,x):
x=self.conv1(x)
output = F.log_softmax(x, dim=1)
return output
model = NeuralNet()
print('Model Summary')
summary (model, input_size=(1,i,i))
# see when i = 6 instead of 5 we are also getting same output size
i = 6; # input
k = 3; #kernel size
s = 2; #stride
p = 1; #padding
o = int(np.floor((i + 2*p - k)/s)) + 1
print("output size -> ",o)
print('-----------------------------------------')
class NeuralNet(nn.Module):
def __init__(self):
super(NeuralNet, self).__init__()
# torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', device=None, dtype=None)
self.conv1 = nn.Conv2d(1,1,k,(s,s), padding = (p, p))
def forward(self,x):
x=self.conv1(x)
output = F.log_softmax(x, dim=1)
return output
model = NeuralNet()
print('Model Summary')
summary (model, input_size=(1,i,i))
This relationship holds for any type of pooling
i = 28; # input
k = 3; #kernel size
s = 2; #stride
p = 0; #padding
o = int(np.floor((i - k + 2*p)/s)) + 1
print("output size -> ",o)
print('-----------------------------------------')
class NeuralNet(nn.Module):
def __init__(self):
super(NeuralNet, self).__init__()
# torch.nn.MaxPool2d(kernel_size, stride=None, padding=0)
self.pool1 = nn.MaxPool2d(k,(s,s), padding = (p, p))
def forward(self,x):
x=self.pool1(x)
# x=self.conv1(x)
output = F.log_softmax(x, dim=1)
return output
model = NeuralNet()
print('Model Summary')
print(model(torch.from_numpy(np.zeros([1,1,i,i]))).shape)
# summary(model,input_size=(1,i,i))
Utility of pooling layers
What is transposed convolution
Objective of transposed convolution
Why we need it?
In terms of operation how is it different from convolution?
For any input size $i$, kernel size $k$, stride $s$ and padding $p$
The formula for convolution is,
\begin{equation}
o = \lfloor \frac{i + 2p - k}{s} \rfloor + 1
\end{equation}
Interchanging i and o we get, \begin{align} o &= \frac{i + 2p - k}{s} + 1 \nonumber\\ o - 1 &= \frac{i + 2p - k}{s} \nonumber\\ s (o - 1) &= i + 2p - k \nonumber\\ i &= s (o - 1) + k - 2p \nonumber \end{align}
Now for the given $i$ and $o$ we consider the equivalent transposed convolution that has input $o$ and output size $i$.
However instead of $o$ and $i$ we use the notation $o', i'$ to denote the case of transposed convolution i.e., $i' = o$ and $o' = i$.
Therefore the formula of output size for transposed convolution becomes,
\begin{align}
o' &= s (i' - 1) + k - 2p \text{ (using $o'$ in place of i and $i'$ in place of o)}
\end{align}
Let us take the example of the following convolution
Now, let us see the corresponding transposed convolution example
# No zero padding, unit strides, transposed
# i is the output of corresponding convolution and k, s, p are the parameters of that convolution
i = 2; # input
k = 3; #kernel size
s = 1; #stride
p = 0; #padding
o = (i-1) * s + k -2*p
print("output size",o)
class NeuralNet(nn.Module):
def __init__(self):
super(NeuralNet, self).__init__()
# Torch.nn.ConvTranspose2d(in_channels, out_channels, kernel_size, stride=1, padding=0)
self.convt = nn.ConvTranspose2d(1,1,k,stride = s, padding = p)
def forward(self,x):
x = self.convt(x)
print(x.shape)
output = F.log_softmax(x, dim=1)
return output
model = NeuralNet()
print('Model Summary')
# print(model(torch.from_numpy(np.zeros([1,1,i,i]))).shape)
print("Out", model((torch.rand(1, 1, i, i))).shape)
# summary(model,input_size=(1, i, i))
# out = model()
Transposed convolution is also called fractionally strided convolution:
# i is the output of corresponding convolution and k, s, p are the parameters of that convolution
i = 2; # input
k = 3; #kernel size
s = 2; #stride
p = 0; #padding
class NeuralNet(nn.Module):
def __init__(self):
super(NeuralNet, self).__init__()
self.convt = nn.ConvTranspose2d(1,1,k,stride = s, padding = p)
def forward(self,x):
x = self.convt(x)
print(x.shape)
output = F.log_softmax(x, dim=1)
return output
model = NeuralNet()
print('Model Summary')
# print(model(torch.from_numpy(np.zeros([1,1,i,i]))).shape)
print("Out", model((torch.rand(1, 1, i, i))).shape)
# summary(model,input_size=(1, i, i))
# out = model()
Disadvantage?
A kernel of size k dilated by a factor d has an effective size of k + (k − 1)(d − 1).
The output size of convolution is,
i = 7; # input
k = 3; #kernel size
s = 1; #stride
import numpy as np
p = 0; #padding
d = 2; #dilation
o = int(np.floor((i - k + 2*p - (k - 1)*(d - 1))/s) + 1)
print("output size -> ",o)
print('-----------------------------------------')
class NeuralNet(nn.Module):
def __init__(self):
super(NeuralNet, self).__init__()
# torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', device=None, dtype=None)
self.conv1 = nn.Conv2d(1,1,k,(s,s), padding = (p, p), dilation=d)
def forward(self,x):
x=self.conv1(x)
output = F.log_softmax(x, dim=1)
return output
model = NeuralNet()
print('Model Summary')
# print(model(torch.from_numpy(np.zeros([1,1,i,i]))).shape)
summary(model,input_size=(1,i,i))