Add at new repo again

This commit is contained in:
2025-01-28 21:48:35 +00:00
commit 6e660ddb3c
564 changed files with 75575 additions and 0 deletions

View File

@@ -0,0 +1,388 @@
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
"""
@Author : Peike Li
@Contact : peike.li@yahoo.com
@File : AugmentCE2P.py
@Time : 8/4/19 3:35 PM
@Desc :
@License : This source code is licensed under the license found in the
LICENSE file in the root directory of this source tree.
"""
import functools
import pdb
import torch
import torch.nn as nn
from torch.nn import functional as F
# Note here we adopt the InplaceABNSync implementation from https://github.com/mapillary/inplace_abn
# By default, the InplaceABNSync module contains a BatchNorm Layer and a LeakyReLu layer
from modules import InPlaceABNSync
import numpy as np
BatchNorm2d = functools.partial(InPlaceABNSync, activation='none')
affine_par = True
pretrained_settings = {
'resnet101': {
'imagenet': {
'input_space': 'BGR',
'input_size': [3, 224, 224],
'input_range': [0, 1],
'mean': [0.406, 0.456, 0.485],
'std': [0.225, 0.224, 0.229],
'num_classes': 1000
}
},
}
def conv3x3(in_planes, out_planes, stride=1):
"3x3 convolution with padding"
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=1, bias=False)
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, stride=1, dilation=1, downsample=None, fist_dilation=1, multi_grid=1):
super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
self.bn1 = BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
padding=dilation * multi_grid, dilation=dilation * multi_grid, bias=False)
self.bn2 = BatchNorm2d(planes)
self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
self.bn3 = BatchNorm2d(planes * 4)
self.relu = nn.ReLU(inplace=False)
self.relu_inplace = nn.ReLU(inplace=True)
self.downsample = downsample
self.dilation = dilation
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
residual = self.downsample(x)
out = out + residual
out = self.relu_inplace(out)
return out
class CostomAdaptiveAvgPool2D(nn.Module):
def __init__(self, output_size):
super(CostomAdaptiveAvgPool2D, self).__init__()
self.output_size = output_size
def forward(self, x):
H_in, W_in = x.shape[-2:]
H_out, W_out = self.output_size
out_i = []
for i in range(H_out):
out_j = []
for j in range(W_out):
hs = int(np.floor(i * H_in / H_out))
he = int(np.ceil((i + 1) * H_in / H_out))
ws = int(np.floor(j * W_in / W_out))
we = int(np.ceil((j + 1) * W_in / W_out))
# print(hs, he, ws, we)
kernel_size = [he - hs, we - ws]
out = F.avg_pool2d(x[:, :, hs:he, ws:we], kernel_size)
out_j.append(out)
out_j = torch.concat(out_j, -1)
out_i.append(out_j)
out_i = torch.concat(out_i, -2)
return out_i
class PSPModule(nn.Module):
"""
Reference:
Zhao, Hengshuang, et al. *"Pyramid scene parsing network."*
"""
def __init__(self, features, out_features=512, sizes=(1, 2, 3, 6)):
super(PSPModule, self).__init__()
self.stages = []
tmp = []
for size in sizes:
if size == 3 or size == 6:
tmp.append(self._make_stage_custom(features, out_features, size))
else:
tmp.append(self._make_stage(features, out_features, size))
self.stages = nn.ModuleList(tmp)
# self.stages = nn.ModuleList([self._make_stage(features, out_features, size) for size in sizes])
self.bottleneck = nn.Sequential(
nn.Conv2d(features + len(sizes) * out_features, out_features, kernel_size=3, padding=1, dilation=1,
bias=False),
InPlaceABNSync(out_features),
)
def _make_stage(self, features, out_features, size):
prior = nn.AdaptiveAvgPool2d(output_size=(size, size))
conv = nn.Conv2d(features, out_features, kernel_size=1, bias=False)
bn = InPlaceABNSync(out_features)
return nn.Sequential(prior, conv, bn)
def _make_stage_custom(self, features, out_features, size):
prior = CostomAdaptiveAvgPool2D(output_size=(size, size))
conv = nn.Conv2d(features, out_features, kernel_size=1, bias=False)
bn = InPlaceABNSync(out_features)
return nn.Sequential(prior, conv, bn)
def forward(self, feats):
h, w = feats.size(2), feats.size(3)
priors = [F.interpolate(input=stage(feats), size=(h, w), mode='bilinear', align_corners=True) for stage in
self.stages] + [feats]
bottle = self.bottleneck(torch.cat(priors, 1))
return bottle
class ASPPModule(nn.Module):
"""
Reference:
Chen, Liang-Chieh, et al. *"Rethinking Atrous Convolution for Semantic Image Segmentation."*
"""
def __init__(self, features, inner_features=256, out_features=512, dilations=(12, 24, 36)):
super(ASPPModule, self).__init__()
self.conv1 = nn.Sequential(nn.AdaptiveAvgPool2d((1, 1)),
nn.Conv2d(features, inner_features, kernel_size=1, padding=0, dilation=1,
bias=False),
InPlaceABNSync(inner_features))
self.conv2 = nn.Sequential(
nn.Conv2d(features, inner_features, kernel_size=1, padding=0, dilation=1, bias=False),
InPlaceABNSync(inner_features))
self.conv3 = nn.Sequential(
nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[0], dilation=dilations[0], bias=False),
InPlaceABNSync(inner_features))
self.conv4 = nn.Sequential(
nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[1], dilation=dilations[1], bias=False),
InPlaceABNSync(inner_features))
self.conv5 = nn.Sequential(
nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[2], dilation=dilations[2], bias=False),
InPlaceABNSync(inner_features))
self.bottleneck = nn.Sequential(
nn.Conv2d(inner_features * 5, out_features, kernel_size=1, padding=0, dilation=1, bias=False),
InPlaceABNSync(out_features),
nn.Dropout2d(0.1)
)
def forward(self, x):
_, _, h, w = x.size()
feat1 = F.interpolate(self.conv1(x), size=(h, w), mode='bilinear', align_corners=True)
feat2 = self.conv2(x)
feat3 = self.conv3(x)
feat4 = self.conv4(x)
feat5 = self.conv5(x)
out = torch.cat((feat1, feat2, feat3, feat4, feat5), 1)
bottle = self.bottleneck(out)
return bottle
class Edge_Module(nn.Module):
"""
Edge Learning Branch
"""
def __init__(self, in_fea=[256, 512, 1024], mid_fea=256, out_fea=2):
super(Edge_Module, self).__init__()
self.conv1 = nn.Sequential(
nn.Conv2d(in_fea[0], mid_fea, kernel_size=1, padding=0, dilation=1, bias=False),
InPlaceABNSync(mid_fea)
)
self.conv2 = nn.Sequential(
nn.Conv2d(in_fea[1], mid_fea, kernel_size=1, padding=0, dilation=1, bias=False),
InPlaceABNSync(mid_fea)
)
self.conv3 = nn.Sequential(
nn.Conv2d(in_fea[2], mid_fea, kernel_size=1, padding=0, dilation=1, bias=False),
InPlaceABNSync(mid_fea)
)
self.conv4 = nn.Conv2d(mid_fea, out_fea, kernel_size=3, padding=1, dilation=1, bias=True)
self.conv5 = nn.Conv2d(out_fea * 3, out_fea, kernel_size=1, padding=0, dilation=1, bias=True)
def forward(self, x1, x2, x3):
_, _, h, w = x1.size()
edge1_fea = self.conv1(x1)
edge1 = self.conv4(edge1_fea)
edge2_fea = self.conv2(x2)
edge2 = self.conv4(edge2_fea)
edge3_fea = self.conv3(x3)
edge3 = self.conv4(edge3_fea)
edge2_fea = F.interpolate(edge2_fea, size=(h, w), mode='bilinear', align_corners=True)
edge3_fea = F.interpolate(edge3_fea, size=(h, w), mode='bilinear', align_corners=True)
edge2 = F.interpolate(edge2, size=(h, w), mode='bilinear', align_corners=True)
edge3 = F.interpolate(edge3, size=(h, w), mode='bilinear', align_corners=True)
edge = torch.cat([edge1, edge2, edge3], dim=1)
edge_fea = torch.cat([edge1_fea, edge2_fea, edge3_fea], dim=1)
edge = self.conv5(edge)
return edge, edge_fea
class Decoder_Module(nn.Module):
"""
Parsing Branch Decoder Module.
"""
def __init__(self, num_classes):
super(Decoder_Module, self).__init__()
self.conv1 = nn.Sequential(
nn.Conv2d(512, 256, kernel_size=1, padding=0, dilation=1, bias=False),
InPlaceABNSync(256)
)
self.conv2 = nn.Sequential(
nn.Conv2d(256, 48, kernel_size=1, stride=1, padding=0, dilation=1, bias=False),
InPlaceABNSync(48)
)
self.conv3 = nn.Sequential(
nn.Conv2d(304, 256, kernel_size=1, padding=0, dilation=1, bias=False),
InPlaceABNSync(256),
nn.Conv2d(256, 256, kernel_size=1, padding=0, dilation=1, bias=False),
InPlaceABNSync(256)
)
self.conv4 = nn.Conv2d(256, num_classes, kernel_size=1, padding=0, dilation=1, bias=True)
def forward(self, xt, xl):
_, _, h, w = xl.size()
xt = F.interpolate(self.conv1(xt), size=(h, w), mode='bilinear', align_corners=True)
xl = self.conv2(xl)
x = torch.cat([xt, xl], dim=1)
x = self.conv3(x)
seg = self.conv4(x)
return seg, x
class ResNet(nn.Module):
def __init__(self, block, layers, num_classes):
self.inplanes = 128
super(ResNet, self).__init__()
self.conv1 = conv3x3(3, 64, stride=2)
self.bn1 = BatchNorm2d(64)
self.relu1 = nn.ReLU(inplace=False)
self.conv2 = conv3x3(64, 64)
self.bn2 = BatchNorm2d(64)
self.relu2 = nn.ReLU(inplace=False)
self.conv3 = conv3x3(64, 128)
self.bn3 = BatchNorm2d(128)
self.relu3 = nn.ReLU(inplace=False)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
self.layer4 = self._make_layer(block, 512, layers[3], stride=1, dilation=2, multi_grid=(1, 1, 1))
self.context_encoding = PSPModule(2048, 512)
self.edge = Edge_Module()
self.decoder = Decoder_Module(num_classes)
self.fushion = nn.Sequential(
nn.Conv2d(1024, 256, kernel_size=1, padding=0, dilation=1, bias=False),
InPlaceABNSync(256),
nn.Dropout2d(0.1),
nn.Conv2d(256, num_classes, kernel_size=1, padding=0, dilation=1, bias=True)
)
def _make_layer(self, block, planes, blocks, stride=1, dilation=1, multi_grid=1):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.inplanes, planes * block.expansion,
kernel_size=1, stride=stride, bias=False),
BatchNorm2d(planes * block.expansion, affine=affine_par))
layers = []
generate_multi_grid = lambda index, grids: grids[index % len(grids)] if isinstance(grids, tuple) else 1
layers.append(block(self.inplanes, planes, stride, dilation=dilation, downsample=downsample,
multi_grid=generate_multi_grid(0, multi_grid)))
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(
block(self.inplanes, planes, dilation=dilation, multi_grid=generate_multi_grid(i, multi_grid)))
return nn.Sequential(*layers)
def forward(self, x):
x = self.relu1(self.bn1(self.conv1(x)))
x = self.relu2(self.bn2(self.conv2(x)))
x = self.relu3(self.bn3(self.conv3(x)))
x = self.maxpool(x)
x2 = self.layer1(x)
x3 = self.layer2(x2)
x4 = self.layer3(x3)
x5 = self.layer4(x4)
x = self.context_encoding(x5)
parsing_result, parsing_fea = self.decoder(x, x2)
# Edge Branch
edge_result, edge_fea = self.edge(x2, x3, x4)
# Fusion Branch
x = torch.cat([parsing_fea, edge_fea], dim=1)
fusion_result = self.fushion(x)
return [[parsing_result, fusion_result], edge_result]
def initialize_pretrained_model(model, settings, pretrained='./models/resnet101-imagenet.pth'):
model.input_space = settings['input_space']
model.input_size = settings['input_size']
model.input_range = settings['input_range']
model.mean = settings['mean']
model.std = settings['std']
if pretrained is not None:
saved_state_dict = torch.load(pretrained)
new_params = model.state_dict().copy()
for i in saved_state_dict:
i_parts = i.split('.')
if not i_parts[0] == 'fc':
new_params['.'.join(i_parts[0:])] = saved_state_dict[i]
model.load_state_dict(new_params)
def resnet101(num_classes=20, pretrained='./models/resnet101-imagenet.pth'):
model = ResNet(Bottleneck, [3, 4, 23, 3], num_classes)
settings = pretrained_settings['resnet101']['imagenet']
initialize_pretrained_model(model, settings, pretrained)
return model

View File

@@ -0,0 +1,12 @@
from __future__ import absolute_import
from networks.AugmentCE2P import resnet101
__factory = {
'resnet101': resnet101,
}
def init_model(name, *args, **kwargs):
if name not in __factory.keys():
raise KeyError("Unknown model arch: {}".format(name))
return __factory[name](*args, **kwargs)

View File

@@ -0,0 +1,156 @@
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
"""
@Author : Peike Li
@Contact : peike.li@yahoo.com
@File : mobilenetv2.py
@Time : 8/4/19 3:35 PM
@Desc :
@License : This source code is licensed under the license found in the
LICENSE file in the root directory of this source tree.
"""
import torch.nn as nn
import math
import functools
from modules import InPlaceABN, InPlaceABNSync
BatchNorm2d = functools.partial(InPlaceABNSync, activation='none')
__all__ = ['mobilenetv2']
def conv_bn(inp, oup, stride):
return nn.Sequential(
nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
BatchNorm2d(oup),
nn.ReLU6(inplace=True)
)
def conv_1x1_bn(inp, oup):
return nn.Sequential(
nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
BatchNorm2d(oup),
nn.ReLU6(inplace=True)
)
class InvertedResidual(nn.Module):
def __init__(self, inp, oup, stride, expand_ratio):
super(InvertedResidual, self).__init__()
self.stride = stride
assert stride in [1, 2]
hidden_dim = round(inp * expand_ratio)
self.use_res_connect = self.stride == 1 and inp == oup
if expand_ratio == 1:
self.conv = nn.Sequential(
# dw
nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
BatchNorm2d(hidden_dim),
nn.ReLU6(inplace=True),
# pw-linear
nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
BatchNorm2d(oup),
)
else:
self.conv = nn.Sequential(
# pw
nn.Conv2d(inp, hidden_dim, 1, 1, 0, bias=False),
BatchNorm2d(hidden_dim),
nn.ReLU6(inplace=True),
# dw
nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
BatchNorm2d(hidden_dim),
nn.ReLU6(inplace=True),
# pw-linear
nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
BatchNorm2d(oup),
)
def forward(self, x):
if self.use_res_connect:
return x + self.conv(x)
else:
return self.conv(x)
class MobileNetV2(nn.Module):
def __init__(self, n_class=1000, input_size=224, width_mult=1.):
super(MobileNetV2, self).__init__()
block = InvertedResidual
input_channel = 32
last_channel = 1280
interverted_residual_setting = [
# t, c, n, s
[1, 16, 1, 1],
[6, 24, 2, 2], # layer 2
[6, 32, 3, 2], # layer 3
[6, 64, 4, 2],
[6, 96, 3, 1], # layer 4
[6, 160, 3, 2],
[6, 320, 1, 1], # layer 5
]
# building first layer
assert input_size % 32 == 0
input_channel = int(input_channel * width_mult)
self.last_channel = int(last_channel * width_mult) if width_mult > 1.0 else last_channel
self.features = [conv_bn(3, input_channel, 2)]
# building inverted residual blocks
for t, c, n, s in interverted_residual_setting:
output_channel = int(c * width_mult)
for i in range(n):
if i == 0:
self.features.append(block(input_channel, output_channel, s, expand_ratio=t))
else:
self.features.append(block(input_channel, output_channel, 1, expand_ratio=t))
input_channel = output_channel
# building last several layers
self.features.append(conv_1x1_bn(input_channel, self.last_channel))
# make it nn.Sequential
self.features = nn.Sequential(*self.features)
# building classifier
self.classifier = nn.Sequential(
nn.Dropout(0.2),
nn.Linear(self.last_channel, n_class),
)
self._initialize_weights()
def forward(self, x):
x = self.features(x)
x = x.mean(3).mean(2)
x = self.classifier(x)
return x
def _initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
if m.bias is not None:
m.bias.data.zero_()
elif isinstance(m, BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
elif isinstance(m, nn.Linear):
n = m.weight.size(1)
m.weight.data.normal_(0, 0.01)
m.bias.data.zero_()
def mobilenetv2(pretrained=False, **kwargs):
"""Constructs a MobileNet_V2 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = MobileNetV2(n_class=1000, **kwargs)
if pretrained:
model.load_state_dict(load_url(model_urls['mobilenetv2']), strict=False)
return model

View File

@@ -0,0 +1,205 @@
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
"""
@Author : Peike Li
@Contact : peike.li@yahoo.com
@File : resnet.py
@Time : 8/4/19 3:35 PM
@Desc :
@License : This source code is licensed under the license found in the
LICENSE file in the root directory of this source tree.
"""
import functools
import torch.nn as nn
import math
from torch.utils.model_zoo import load_url
from modules import InPlaceABNSync
BatchNorm2d = functools.partial(InPlaceABNSync, activation='none')
__all__ = ['ResNet', 'resnet18', 'resnet50', 'resnet101'] # resnet101 is coming soon!
model_urls = {
'resnet18': 'http://sceneparsing.csail.mit.edu/model/pretrained_resnet/resnet18-imagenet.pth',
'resnet50': 'http://sceneparsing.csail.mit.edu/model/pretrained_resnet/resnet50-imagenet.pth',
'resnet101': 'http://sceneparsing.csail.mit.edu/model/pretrained_resnet/resnet101-imagenet.pth'
}
def conv3x3(in_planes, out_planes, stride=1):
"3x3 convolution with padding"
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=1, bias=False)
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(BasicBlock, self).__init__()
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn1 = BatchNorm2d(planes)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.bn2 = BatchNorm2d(planes)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
self.bn1 = BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
padding=1, bias=False)
self.bn2 = BatchNorm2d(planes)
self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
self.bn3 = BatchNorm2d(planes * 4)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class ResNet(nn.Module):
def __init__(self, block, layers, num_classes=1000):
self.inplanes = 128
super(ResNet, self).__init__()
self.conv1 = conv3x3(3, 64, stride=2)
self.bn1 = BatchNorm2d(64)
self.relu1 = nn.ReLU(inplace=True)
self.conv2 = conv3x3(64, 64)
self.bn2 = BatchNorm2d(64)
self.relu2 = nn.ReLU(inplace=True)
self.conv3 = conv3x3(64, 128)
self.bn3 = BatchNorm2d(128)
self.relu3 = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
self.avgpool = nn.AvgPool2d(7, stride=1)
self.fc = nn.Linear(512 * block.expansion, num_classes)
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
def _make_layer(self, block, planes, blocks, stride=1):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.inplanes, planes * block.expansion,
kernel_size=1, stride=stride, bias=False),
BatchNorm2d(planes * block.expansion),
)
layers = []
layers.append(block(self.inplanes, planes, stride, downsample))
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(block(self.inplanes, planes))
return nn.Sequential(*layers)
def forward(self, x):
x = self.relu1(self.bn1(self.conv1(x)))
x = self.relu2(self.bn2(self.conv2(x)))
x = self.relu3(self.bn3(self.conv3(x)))
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.avgpool(x)
x = x.view(x.size(0), -1)
x = self.fc(x)
return x
def resnet18(pretrained=False, **kwargs):
"""Constructs a ResNet-18 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
if pretrained:
model.load_state_dict(load_url(model_urls['resnet18']))
return model
def resnet50(pretrained=False, **kwargs):
"""Constructs a ResNet-50 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
if pretrained:
model.load_state_dict(load_url(model_urls['resnet50']), strict=False)
return model
def resnet101(pretrained=False, **kwargs):
"""Constructs a ResNet-101 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
if pretrained:
model.load_state_dict(load_url(model_urls['resnet101']), strict=False)
return model

View File

@@ -0,0 +1,149 @@
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
"""
@Author : Peike Li
@Contact : peike.li@yahoo.com
@File : resnext.py.py
@Time : 8/11/19 8:58 PM
@Desc :
@License : This source code is licensed under the license found in the
LICENSE file in the root directory of this source tree.
"""
import functools
import torch.nn as nn
import math
from torch.utils.model_zoo import load_url
from modules import InPlaceABNSync
BatchNorm2d = functools.partial(InPlaceABNSync, activation='none')
__all__ = ['ResNeXt', 'resnext101'] # support resnext 101
model_urls = {
'resnext50': 'http://sceneparsing.csail.mit.edu/model/pretrained_resnet/resnext50-imagenet.pth',
'resnext101': 'http://sceneparsing.csail.mit.edu/model/pretrained_resnet/resnext101-imagenet.pth'
}
def conv3x3(in_planes, out_planes, stride=1):
"3x3 convolution with padding"
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=1, bias=False)
class GroupBottleneck(nn.Module):
expansion = 2
def __init__(self, inplanes, planes, stride=1, groups=1, downsample=None):
super(GroupBottleneck, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
self.bn1 = BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
padding=1, groups=groups, bias=False)
self.bn2 = BatchNorm2d(planes)
self.conv3 = nn.Conv2d(planes, planes * 2, kernel_size=1, bias=False)
self.bn3 = BatchNorm2d(planes * 2)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class ResNeXt(nn.Module):
def __init__(self, block, layers, groups=32, num_classes=1000):
self.inplanes = 128
super(ResNeXt, self).__init__()
self.conv1 = conv3x3(3, 64, stride=2)
self.bn1 = BatchNorm2d(64)
self.relu1 = nn.ReLU(inplace=True)
self.conv2 = conv3x3(64, 64)
self.bn2 = BatchNorm2d(64)
self.relu2 = nn.ReLU(inplace=True)
self.conv3 = conv3x3(64, 128)
self.bn3 = BatchNorm2d(128)
self.relu3 = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 128, layers[0], groups=groups)
self.layer2 = self._make_layer(block, 256, layers[1], stride=2, groups=groups)
self.layer3 = self._make_layer(block, 512, layers[2], stride=2, groups=groups)
self.layer4 = self._make_layer(block, 1024, layers[3], stride=2, groups=groups)
self.avgpool = nn.AvgPool2d(7, stride=1)
self.fc = nn.Linear(1024 * block.expansion, num_classes)
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels // m.groups
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
def _make_layer(self, block, planes, blocks, stride=1, groups=1):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.inplanes, planes * block.expansion,
kernel_size=1, stride=stride, bias=False),
BatchNorm2d(planes * block.expansion),
)
layers = []
layers.append(block(self.inplanes, planes, stride, groups, downsample))
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(block(self.inplanes, planes, groups=groups))
return nn.Sequential(*layers)
def forward(self, x):
x = self.relu1(self.bn1(self.conv1(x)))
x = self.relu2(self.bn2(self.conv2(x)))
x = self.relu3(self.bn3(self.conv3(x)))
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.avgpool(x)
x = x.view(x.size(0), -1)
x = self.fc(x)
return x
def resnext101(pretrained=False, **kwargs):
"""Constructs a ResNet-101 model.
Args:
pretrained (bool): If True, returns a model pre-trained on Places
"""
model = ResNeXt(GroupBottleneck, [3, 4, 23, 3], **kwargs)
if pretrained:
model.load_state_dict(load_url(model_urls['resnext101']), strict=False)
return model

View File

@@ -0,0 +1,64 @@
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
"""
@Author : Peike Li
@Contact : peike.li@yahoo.com
@File : aspp.py
@Time : 8/4/19 3:36 PM
@Desc :
@License : This source code is licensed under the license found in the
LICENSE file in the root directory of this source tree.
"""
import torch
import torch.nn as nn
from torch.nn import functional as F
from modules import InPlaceABNSync
class ASPPModule(nn.Module):
"""
Reference:
Chen, Liang-Chieh, et al. *"Rethinking Atrous Convolution for Semantic Image Segmentation."*
"""
def __init__(self, features, out_features=512, inner_features=256, dilations=(12, 24, 36)):
super(ASPPModule, self).__init__()
self.conv1 = nn.Sequential(nn.AdaptiveAvgPool2d((1, 1)),
nn.Conv2d(features, inner_features, kernel_size=1, padding=0, dilation=1,
bias=False),
InPlaceABNSync(inner_features))
self.conv2 = nn.Sequential(
nn.Conv2d(features, inner_features, kernel_size=1, padding=0, dilation=1, bias=False),
InPlaceABNSync(inner_features))
self.conv3 = nn.Sequential(
nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[0], dilation=dilations[0], bias=False),
InPlaceABNSync(inner_features))
self.conv4 = nn.Sequential(
nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[1], dilation=dilations[1], bias=False),
InPlaceABNSync(inner_features))
self.conv5 = nn.Sequential(
nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[2], dilation=dilations[2], bias=False),
InPlaceABNSync(inner_features))
self.bottleneck = nn.Sequential(
nn.Conv2d(inner_features * 5, out_features, kernel_size=1, padding=0, dilation=1, bias=False),
InPlaceABNSync(out_features),
nn.Dropout2d(0.1)
)
def forward(self, x):
_, _, h, w = x.size()
feat1 = F.interpolate(self.conv1(x), size=(h, w), mode='bilinear', align_corners=True)
feat2 = self.conv2(x)
feat3 = self.conv3(x)
feat4 = self.conv4(x)
feat5 = self.conv5(x)
out = torch.cat((feat1, feat2, feat3, feat4, feat5), 1)
bottle = self.bottleneck(out)
return bottle

View File

@@ -0,0 +1,226 @@
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
"""
@Author : Peike Li
@Contact : peike.li@yahoo.com
@File : ocnet.py
@Time : 8/4/19 3:36 PM
@Desc :
@License : This source code is licensed under the license found in the
LICENSE file in the root directory of this source tree.
"""
import functools
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.nn import functional as F
from modules import InPlaceABNSync
BatchNorm2d = functools.partial(InPlaceABNSync, activation='none')
class _SelfAttentionBlock(nn.Module):
'''
The basic implementation for self-attention block/non-local block
Input:
N X C X H X W
Parameters:
in_channels : the dimension of the input feature map
key_channels : the dimension after the key/query transform
value_channels : the dimension after the value transform
scale : choose the scale to downsample the input feature maps (save memory cost)
Return:
N X C X H X W
position-aware context features.(w/o concate or add with the input)
'''
def __init__(self, in_channels, key_channels, value_channels, out_channels=None, scale=1):
super(_SelfAttentionBlock, self).__init__()
self.scale = scale
self.in_channels = in_channels
self.out_channels = out_channels
self.key_channels = key_channels
self.value_channels = value_channels
if out_channels == None:
self.out_channels = in_channels
self.pool = nn.MaxPool2d(kernel_size=(scale, scale))
self.f_key = nn.Sequential(
nn.Conv2d(in_channels=self.in_channels, out_channels=self.key_channels,
kernel_size=1, stride=1, padding=0),
InPlaceABNSync(self.key_channels),
)
self.f_query = self.f_key
self.f_value = nn.Conv2d(in_channels=self.in_channels, out_channels=self.value_channels,
kernel_size=1, stride=1, padding=0)
self.W = nn.Conv2d(in_channels=self.value_channels, out_channels=self.out_channels,
kernel_size=1, stride=1, padding=0)
nn.init.constant(self.W.weight, 0)
nn.init.constant(self.W.bias, 0)
def forward(self, x):
batch_size, h, w = x.size(0), x.size(2), x.size(3)
if self.scale > 1:
x = self.pool(x)
value = self.f_value(x).view(batch_size, self.value_channels, -1)
value = value.permute(0, 2, 1)
query = self.f_query(x).view(batch_size, self.key_channels, -1)
query = query.permute(0, 2, 1)
key = self.f_key(x).view(batch_size, self.key_channels, -1)
sim_map = torch.matmul(query, key)
sim_map = (self.key_channels ** -.5) * sim_map
sim_map = F.softmax(sim_map, dim=-1)
context = torch.matmul(sim_map, value)
context = context.permute(0, 2, 1).contiguous()
context = context.view(batch_size, self.value_channels, *x.size()[2:])
context = self.W(context)
if self.scale > 1:
context = F.upsample(input=context, size=(h, w), mode='bilinear', align_corners=True)
return context
class SelfAttentionBlock2D(_SelfAttentionBlock):
def __init__(self, in_channels, key_channels, value_channels, out_channels=None, scale=1):
super(SelfAttentionBlock2D, self).__init__(in_channels,
key_channels,
value_channels,
out_channels,
scale)
class BaseOC_Module(nn.Module):
"""
Implementation of the BaseOC module
Parameters:
in_features / out_features: the channels of the input / output feature maps.
dropout: we choose 0.05 as the default value.
size: you can apply multiple sizes. Here we only use one size.
Return:
features fused with Object context information.
"""
def __init__(self, in_channels, out_channels, key_channels, value_channels, dropout, sizes=([1])):
super(BaseOC_Module, self).__init__()
self.stages = []
self.stages = nn.ModuleList(
[self._make_stage(in_channels, out_channels, key_channels, value_channels, size) for size in sizes])
self.conv_bn_dropout = nn.Sequential(
nn.Conv2d(2 * in_channels, out_channels, kernel_size=1, padding=0),
InPlaceABNSync(out_channels),
nn.Dropout2d(dropout)
)
def _make_stage(self, in_channels, output_channels, key_channels, value_channels, size):
return SelfAttentionBlock2D(in_channels,
key_channels,
value_channels,
output_channels,
size)
def forward(self, feats):
priors = [stage(feats) for stage in self.stages]
context = priors[0]
for i in range(1, len(priors)):
context += priors[i]
output = self.conv_bn_dropout(torch.cat([context, feats], 1))
return output
class BaseOC_Context_Module(nn.Module):
"""
Output only the context features.
Parameters:
in_features / out_features: the channels of the input / output feature maps.
dropout: specify the dropout ratio
fusion: We provide two different fusion method, "concat" or "add"
size: we find that directly learn the attention weights on even 1/8 feature maps is hard.
Return:
features after "concat" or "add"
"""
def __init__(self, in_channels, out_channels, key_channels, value_channels, dropout, sizes=([1])):
super(BaseOC_Context_Module, self).__init__()
self.stages = []
self.stages = nn.ModuleList(
[self._make_stage(in_channels, out_channels, key_channels, value_channels, size) for size in sizes])
self.conv_bn_dropout = nn.Sequential(
nn.Conv2d(in_channels, out_channels, kernel_size=1, padding=0),
InPlaceABNSync(out_channels),
)
def _make_stage(self, in_channels, output_channels, key_channels, value_channels, size):
return SelfAttentionBlock2D(in_channels,
key_channels,
value_channels,
output_channels,
size)
def forward(self, feats):
priors = [stage(feats) for stage in self.stages]
context = priors[0]
for i in range(1, len(priors)):
context += priors[i]
output = self.conv_bn_dropout(context)
return output
class ASP_OC_Module(nn.Module):
def __init__(self, features, out_features=256, dilations=(12, 24, 36)):
super(ASP_OC_Module, self).__init__()
self.context = nn.Sequential(nn.Conv2d(features, out_features, kernel_size=3, padding=1, dilation=1, bias=True),
InPlaceABNSync(out_features),
BaseOC_Context_Module(in_channels=out_features, out_channels=out_features,
key_channels=out_features // 2, value_channels=out_features,
dropout=0, sizes=([2])))
self.conv2 = nn.Sequential(nn.Conv2d(features, out_features, kernel_size=1, padding=0, dilation=1, bias=False),
InPlaceABNSync(out_features))
self.conv3 = nn.Sequential(
nn.Conv2d(features, out_features, kernel_size=3, padding=dilations[0], dilation=dilations[0], bias=False),
InPlaceABNSync(out_features))
self.conv4 = nn.Sequential(
nn.Conv2d(features, out_features, kernel_size=3, padding=dilations[1], dilation=dilations[1], bias=False),
InPlaceABNSync(out_features))
self.conv5 = nn.Sequential(
nn.Conv2d(features, out_features, kernel_size=3, padding=dilations[2], dilation=dilations[2], bias=False),
InPlaceABNSync(out_features))
self.conv_bn_dropout = nn.Sequential(
nn.Conv2d(out_features * 5, out_features, kernel_size=1, padding=0, dilation=1, bias=False),
InPlaceABNSync(out_features),
nn.Dropout2d(0.1)
)
def _cat_each(self, feat1, feat2, feat3, feat4, feat5):
assert (len(feat1) == len(feat2))
z = []
for i in range(len(feat1)):
z.append(torch.cat((feat1[i], feat2[i], feat3[i], feat4[i], feat5[i]), 1))
return z
def forward(self, x):
if isinstance(x, Variable):
_, _, h, w = x.size()
elif isinstance(x, tuple) or isinstance(x, list):
_, _, h, w = x[0].size()
else:
raise RuntimeError('unknown input type')
feat1 = self.context(x)
feat2 = self.conv2(x)
feat3 = self.conv3(x)
feat4 = self.conv4(x)
feat5 = self.conv5(x)
if isinstance(x, Variable):
out = torch.cat((feat1, feat2, feat3, feat4, feat5), 1)
elif isinstance(x, tuple) or isinstance(x, list):
out = self._cat_each(feat1, feat2, feat3, feat4, feat5)
else:
raise RuntimeError('unknown input type')
output = self.conv_bn_dropout(out)
return output

View File

@@ -0,0 +1,48 @@
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
"""
@Author : Peike Li
@Contact : peike.li@yahoo.com
@File : psp.py
@Time : 8/4/19 3:36 PM
@Desc :
@License : This source code is licensed under the license found in the
LICENSE file in the root directory of this source tree.
"""
import torch
import torch.nn as nn
from torch.nn import functional as F
from modules import InPlaceABNSync
class PSPModule(nn.Module):
"""
Reference:
Zhao, Hengshuang, et al. *"Pyramid scene parsing network."*
"""
def __init__(self, features, out_features=512, sizes=(1, 2, 3, 6)):
super(PSPModule, self).__init__()
self.stages = []
self.stages = nn.ModuleList([self._make_stage(features, out_features, size) for size in sizes])
self.bottleneck = nn.Sequential(
nn.Conv2d(features + len(sizes) * out_features, out_features, kernel_size=3, padding=1, dilation=1,
bias=False),
InPlaceABNSync(out_features),
)
def _make_stage(self, features, out_features, size):
prior = nn.AdaptiveAvgPool2d(output_size=(size, size))
conv = nn.Conv2d(features, out_features, kernel_size=1, bias=False)
bn = InPlaceABNSync(out_features)
return nn.Sequential(prior, conv, bn)
def forward(self, feats):
h, w = feats.size(2), feats.size(3)
priors = [F.interpolate(input=stage(feats), size=(h, w), mode='bilinear', align_corners=True) for stage in
self.stages] + [feats]
bottle = self.bottleneck(torch.cat(priors, 1))
return bottle