提高深度神经网络表现核心提高尺寸(increasing size)
1、增加模型的深度---神经网络的层数
2、增加模型的宽度---每一层神经元的数量
上述两点带来的问题:
1、参数量的增加(larger number of parameter),进而可能导致过拟合发生(overfitting),就需要对样本就行改进(数量、质量);
2、计算资源的怎加(increasing computation resource);
改进措施:作者给出的建议是:从全连接到稀疏链接(fully connected to sparsely connected)。在数据集的概率分布可以用一个大型的、非常稀疏的深度神经网络来表示前提下,通过分析最后一层激活的相关统计数据逐层构建最优网络拓扑,对具有高度相关输出的神经元进行聚类。
Their main result states that if the probability distribution of the data-set is representable by a large, very sparse deep neural network, then the optimal network topology can be constructed layer by layer by analyzing the correlation statistics of the activations of the last layer and clustering neurons with highly correlated outputs.
模型结构
正如作者前文所提到的increase the size,作者不是在模型的“长度”(添加各种卷积或者全连接操作)而是在“宽度”上进行修改:
左侧为最初模型,右侧是在最初模型的基础上所提出的改进模型。对比 (a) 模型存在一个$3*3$和$5*5$卷积那么无疑增加了计算消耗(问题二)。作者提出的思路是在这些卷积层添加一个$1*1$的卷积,进而对模型的计算进行减少。
模型计算减少
那么此时模型的计算次数为:(28x28x32)x(5x5x192)=120422400
通过添加 $1x1$ 卷积层 那么此时的计算次数为:(28x28x16x1x1x192)+(28x28x32x5x5x16)=12,443,648
那么根据上述思路,作者提出如下的模型结构图:
整体来说GoogLeNet
模型最大的创新点如下:
1、在卷积神经网络中对于提高准确率有效的方法就是改变整体网络size,随之而来也会带来许多难题,于此通过$1*1$的卷积来减少参数;
2、改变整体网络长度的同时,作者提出改变模型宽度
import torch
import torch.nn as nn
import numpy as np
from typing import Optional, Tuple, Any
device = ('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
cuda
class InceptionModule(nn.Module):
"""
in_channel: 初始输入模型通道数量
out1: 第一层1x1卷积输出
pre_in3: 3x3卷积输入
com_out3: 3x3卷积输出
pre_in5: 5x5卷积输入
com_out5: 5x5卷积输出
pool_out: 池化层输出
"""
def __init__(self, in_channel, out1, pre_in3, com_out3, pre_in5, com_out5, pool_out):
super(InceptionModule, self).__init__()
self.con1 = nn.Conv2d(in_channel, out1, kernel_size= 1) # 1x1卷积层
self.con3 = nn.Sequential(
nn.Conv2d(in_channel, pre_in3, kernel_size= 1),
nn.Conv2d(pre_in3, com_out3, kernel_size=3, padding= 1),
) # 1x1+3x3
self.con5 = nn.Sequential(
nn.Conv2d(in_channel, pre_in5, kernel_size= 1),
nn.Conv2d(pre_in5, com_out5, kernel_size= 5, padding= 2)
) # 1x1+5x5
self.pool = nn.Sequential(
nn.MaxPool2d(kernel_size=3, stride=1, padding=1, ceil_mode=True),
nn.Conv2d(in_channel, pool_out, kernel_size= 1, stride= 1)
) # max_pooling+1x1
def forward(self, x):
x1 = self.con1(x)
x3 = self.con3(x)
x5 = self.con5(x)
max_x = self.pool(x)
# print(x1.size(), x3.size(), x5.size(), max_x.size())
return torch.concatenate([x1, x3, x5, max_x], 1)
class ConvBlock(nn.Module):
"""
定义一个卷积网络类型,简化后续卷积操作
"""
def __init__(self, in_channels: int, out_channels: int, **kwargs: Any) -> None:
super(ConvBlock, self).__init__()
self.conv = nn.Conv2d(in_channels, out_channels, bias=False, **kwargs)
self.batch_norm = nn.BatchNorm2d(out_channels, eps= 0.001)
self.relu = nn.ReLU(True)
def forward(self, x):
out = self.conv(x)
out = self.batch_norm(out)
out = self.relu(out)
return out
class InceptionAux(nn.Module):
"""
在GoogLeNet中存在分支判断函数,因此补充分支判断函数定义
"""
def __init__(
self,
input_size: int,
num_classes: int= 1000,
dropout: float=0.7) -> None:
super(InceptionAux, self).__init__()
self.avg_pool = nn.AvgPool2d(kernel_size=5, stride=3, ceil_mode=True)
self.conv = ConvBlock(input_size, 128, kernel_size=1, stride=1)
self.fc1 = nn.Linear(2048, 1024)
self.fc2 = nn.Linear(1024, num_classes)
self.relu = nn.ReLU(True)
self.dropout = nn.Dropout(dropout, True)
def forward(self, x):
x = self.avg_pool(x)
x = self.conv(x)
x = torch.flatten(x, 1)
x = self.fc1(x)
x = self.relu(x)
x = self.dropout(x)
x = self.fc2(x)
return x
class GoogLeNet(nn.Module):
"""
input_size:输入模型图片channel
out_label:输出图片类型
"""
def __init__(
self,
input_size: int=3,
num_classes: int= 1000,
dropout: float = 0.4,
dropout_aux: float = 0.7,
inception_aux: bool= True) -> None:
super(GoogLeNet, self).__init__()
self.inception_aux = inception_aux # 判断是否走分支
self.conv1 = ConvBlock(input_size, 64, kernel_size=7, stride=2, padding=3)
self.max_pool1 = nn.MaxPool2d(kernel_size= 3, stride=2) # 56x56x64
self.conv2 = ConvBlock(64, 192, kernel_size=1, stride=1) # 56x56x192
self.conv3 = ConvBlock(192, 192, kernel_size=3, stride=1, padding=1) # 56x56x192
self.max_pool2 = nn.MaxPool2d(kernel_size=3, stride=2) # 28x28x192
self.incept3a = InceptionModule(192, 64, 96, 128, 16, 32, 32) # 28x28x256
self.incept3b = InceptionModule(256, 128, 128, 192, 32, 96, 64) # 28x28x480
self.max_pool3 = nn.MaxPool2d(kernel_size=3, stride=2) # 14x14x480
self.incept4a = InceptionModule(480, 192, 96, 208, 16, 48, 64) # 14x14x512
# 判断
self.incept4b = InceptionModule(512, 160, 112, 224, 24, 64, 64) # 14x14x512
self.incept4c = InceptionModule(512, 128, 128, 256, 24, 64, 64) # 14x14x512
self.incept4d = InceptionModule(512, 112, 144, 288, 32, 64, 64) # 14x14x528
# 判断
self.incept4e = InceptionModule(528, 256, 160, 320, 32, 128, 128) # 14x14x832
self.max_pool4 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) # 7x7x832
self.incept5a = InceptionModule(832, 256, 160, 320, 32, 128, 128) # 7x7x832
self.incept5b = InceptionModule(832, 384, 192, 384, 48, 128, 128) # 7x7x1024
self.avg_pool = nn.AvgPool2d(kernel_size=7, stride=1) # 1x1x1024
self.linear = nn.Linear(1024, num_classes) # 1x1x1000
self.dropout = nn.Dropout(dropout_aux)
# 补充判断函数
if inception_aux:
self.inception_aux1 = InceptionAux(512, num_classes, dropout)
self.inception_aux2 = InceptionAux(528, num_classes, dropout)
else:
self.inception_aux1 = None
self.inception_aux2 = None
def forward(self, x):
x = self.conv1(x)
x = self.max_pool1(x)
x = self.max_pool2(self.conv3(self.conv2(x)))
x = self.incept3a(x)
x = self.incept3b(x)
x = self.max_pool3(x)
x = self.incept4a(x)
if self.inception_aux1 is not None:
aux1 = self.inception_aux1(x)
x = self.incept4b(x)
x = self.incept4c(x)
x = self.incept4d(x)
if self.inception_aux2 is not None:
aux2 = self.inception_aux2(x)
x = self.incept4e(x)
x = self.max_pool4(x)
x = self.incept5a(x)
x = self.incept5b(x)
x = self.avg_pool(x)
x = torch.flatten(x, 1)
x = self.dropout(x)
x = self.linear(x)
return x, aux2, aux1
model = GoogLeNet().to(device)
x = torch.randn(1, 3, 224, 224).to(device)
y = model(x)