Add at new repo again
This commit is contained in:
388
vton-api/preprocess/humanparsing/networks/AugmentCE2P.py
Normal file
388
vton-api/preprocess/humanparsing/networks/AugmentCE2P.py
Normal file
@@ -0,0 +1,388 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- encoding: utf-8 -*-
|
||||
|
||||
"""
|
||||
@Author : Peike Li
|
||||
@Contact : peike.li@yahoo.com
|
||||
@File : AugmentCE2P.py
|
||||
@Time : 8/4/19 3:35 PM
|
||||
@Desc :
|
||||
@License : This source code is licensed under the license found in the
|
||||
LICENSE file in the root directory of this source tree.
|
||||
"""
|
||||
|
||||
import functools
|
||||
import pdb
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from torch.nn import functional as F
|
||||
# Note here we adopt the InplaceABNSync implementation from https://github.com/mapillary/inplace_abn
|
||||
# By default, the InplaceABNSync module contains a BatchNorm Layer and a LeakyReLu layer
|
||||
from modules import InPlaceABNSync
|
||||
import numpy as np
|
||||
|
||||
BatchNorm2d = functools.partial(InPlaceABNSync, activation='none')
|
||||
|
||||
affine_par = True
|
||||
|
||||
pretrained_settings = {
|
||||
'resnet101': {
|
||||
'imagenet': {
|
||||
'input_space': 'BGR',
|
||||
'input_size': [3, 224, 224],
|
||||
'input_range': [0, 1],
|
||||
'mean': [0.406, 0.456, 0.485],
|
||||
'std': [0.225, 0.224, 0.229],
|
||||
'num_classes': 1000
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def conv3x3(in_planes, out_planes, stride=1):
|
||||
"3x3 convolution with padding"
|
||||
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
|
||||
padding=1, bias=False)
|
||||
|
||||
|
||||
class Bottleneck(nn.Module):
|
||||
expansion = 4
|
||||
|
||||
def __init__(self, inplanes, planes, stride=1, dilation=1, downsample=None, fist_dilation=1, multi_grid=1):
|
||||
super(Bottleneck, self).__init__()
|
||||
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
|
||||
self.bn1 = BatchNorm2d(planes)
|
||||
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
|
||||
padding=dilation * multi_grid, dilation=dilation * multi_grid, bias=False)
|
||||
self.bn2 = BatchNorm2d(planes)
|
||||
self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
|
||||
self.bn3 = BatchNorm2d(planes * 4)
|
||||
self.relu = nn.ReLU(inplace=False)
|
||||
self.relu_inplace = nn.ReLU(inplace=True)
|
||||
self.downsample = downsample
|
||||
self.dilation = dilation
|
||||
self.stride = stride
|
||||
|
||||
def forward(self, x):
|
||||
residual = x
|
||||
|
||||
out = self.conv1(x)
|
||||
out = self.bn1(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = self.conv2(out)
|
||||
out = self.bn2(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = self.conv3(out)
|
||||
out = self.bn3(out)
|
||||
|
||||
if self.downsample is not None:
|
||||
residual = self.downsample(x)
|
||||
|
||||
out = out + residual
|
||||
out = self.relu_inplace(out)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class CostomAdaptiveAvgPool2D(nn.Module):
|
||||
|
||||
def __init__(self, output_size):
|
||||
|
||||
super(CostomAdaptiveAvgPool2D, self).__init__()
|
||||
|
||||
self.output_size = output_size
|
||||
|
||||
def forward(self, x):
|
||||
|
||||
H_in, W_in = x.shape[-2:]
|
||||
H_out, W_out = self.output_size
|
||||
|
||||
out_i = []
|
||||
for i in range(H_out):
|
||||
out_j = []
|
||||
for j in range(W_out):
|
||||
hs = int(np.floor(i * H_in / H_out))
|
||||
he = int(np.ceil((i + 1) * H_in / H_out))
|
||||
|
||||
ws = int(np.floor(j * W_in / W_out))
|
||||
we = int(np.ceil((j + 1) * W_in / W_out))
|
||||
|
||||
# print(hs, he, ws, we)
|
||||
kernel_size = [he - hs, we - ws]
|
||||
|
||||
out = F.avg_pool2d(x[:, :, hs:he, ws:we], kernel_size)
|
||||
out_j.append(out)
|
||||
|
||||
out_j = torch.concat(out_j, -1)
|
||||
out_i.append(out_j)
|
||||
|
||||
out_i = torch.concat(out_i, -2)
|
||||
return out_i
|
||||
|
||||
|
||||
class PSPModule(nn.Module):
|
||||
"""
|
||||
Reference:
|
||||
Zhao, Hengshuang, et al. *"Pyramid scene parsing network."*
|
||||
"""
|
||||
|
||||
def __init__(self, features, out_features=512, sizes=(1, 2, 3, 6)):
|
||||
super(PSPModule, self).__init__()
|
||||
|
||||
self.stages = []
|
||||
tmp = []
|
||||
for size in sizes:
|
||||
if size == 3 or size == 6:
|
||||
tmp.append(self._make_stage_custom(features, out_features, size))
|
||||
else:
|
||||
tmp.append(self._make_stage(features, out_features, size))
|
||||
self.stages = nn.ModuleList(tmp)
|
||||
# self.stages = nn.ModuleList([self._make_stage(features, out_features, size) for size in sizes])
|
||||
self.bottleneck = nn.Sequential(
|
||||
nn.Conv2d(features + len(sizes) * out_features, out_features, kernel_size=3, padding=1, dilation=1,
|
||||
bias=False),
|
||||
InPlaceABNSync(out_features),
|
||||
)
|
||||
|
||||
def _make_stage(self, features, out_features, size):
|
||||
prior = nn.AdaptiveAvgPool2d(output_size=(size, size))
|
||||
conv = nn.Conv2d(features, out_features, kernel_size=1, bias=False)
|
||||
bn = InPlaceABNSync(out_features)
|
||||
return nn.Sequential(prior, conv, bn)
|
||||
|
||||
def _make_stage_custom(self, features, out_features, size):
|
||||
prior = CostomAdaptiveAvgPool2D(output_size=(size, size))
|
||||
conv = nn.Conv2d(features, out_features, kernel_size=1, bias=False)
|
||||
bn = InPlaceABNSync(out_features)
|
||||
return nn.Sequential(prior, conv, bn)
|
||||
|
||||
def forward(self, feats):
|
||||
h, w = feats.size(2), feats.size(3)
|
||||
priors = [F.interpolate(input=stage(feats), size=(h, w), mode='bilinear', align_corners=True) for stage in
|
||||
self.stages] + [feats]
|
||||
bottle = self.bottleneck(torch.cat(priors, 1))
|
||||
return bottle
|
||||
|
||||
|
||||
class ASPPModule(nn.Module):
|
||||
"""
|
||||
Reference:
|
||||
Chen, Liang-Chieh, et al. *"Rethinking Atrous Convolution for Semantic Image Segmentation."*
|
||||
"""
|
||||
|
||||
def __init__(self, features, inner_features=256, out_features=512, dilations=(12, 24, 36)):
|
||||
super(ASPPModule, self).__init__()
|
||||
|
||||
self.conv1 = nn.Sequential(nn.AdaptiveAvgPool2d((1, 1)),
|
||||
nn.Conv2d(features, inner_features, kernel_size=1, padding=0, dilation=1,
|
||||
bias=False),
|
||||
InPlaceABNSync(inner_features))
|
||||
self.conv2 = nn.Sequential(
|
||||
nn.Conv2d(features, inner_features, kernel_size=1, padding=0, dilation=1, bias=False),
|
||||
InPlaceABNSync(inner_features))
|
||||
self.conv3 = nn.Sequential(
|
||||
nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[0], dilation=dilations[0], bias=False),
|
||||
InPlaceABNSync(inner_features))
|
||||
self.conv4 = nn.Sequential(
|
||||
nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[1], dilation=dilations[1], bias=False),
|
||||
InPlaceABNSync(inner_features))
|
||||
self.conv5 = nn.Sequential(
|
||||
nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[2], dilation=dilations[2], bias=False),
|
||||
InPlaceABNSync(inner_features))
|
||||
|
||||
self.bottleneck = nn.Sequential(
|
||||
nn.Conv2d(inner_features * 5, out_features, kernel_size=1, padding=0, dilation=1, bias=False),
|
||||
InPlaceABNSync(out_features),
|
||||
nn.Dropout2d(0.1)
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
_, _, h, w = x.size()
|
||||
|
||||
feat1 = F.interpolate(self.conv1(x), size=(h, w), mode='bilinear', align_corners=True)
|
||||
|
||||
feat2 = self.conv2(x)
|
||||
feat3 = self.conv3(x)
|
||||
feat4 = self.conv4(x)
|
||||
feat5 = self.conv5(x)
|
||||
out = torch.cat((feat1, feat2, feat3, feat4, feat5), 1)
|
||||
|
||||
bottle = self.bottleneck(out)
|
||||
return bottle
|
||||
|
||||
|
||||
class Edge_Module(nn.Module):
|
||||
"""
|
||||
Edge Learning Branch
|
||||
"""
|
||||
|
||||
def __init__(self, in_fea=[256, 512, 1024], mid_fea=256, out_fea=2):
|
||||
super(Edge_Module, self).__init__()
|
||||
|
||||
self.conv1 = nn.Sequential(
|
||||
nn.Conv2d(in_fea[0], mid_fea, kernel_size=1, padding=0, dilation=1, bias=False),
|
||||
InPlaceABNSync(mid_fea)
|
||||
)
|
||||
self.conv2 = nn.Sequential(
|
||||
nn.Conv2d(in_fea[1], mid_fea, kernel_size=1, padding=0, dilation=1, bias=False),
|
||||
InPlaceABNSync(mid_fea)
|
||||
)
|
||||
self.conv3 = nn.Sequential(
|
||||
nn.Conv2d(in_fea[2], mid_fea, kernel_size=1, padding=0, dilation=1, bias=False),
|
||||
InPlaceABNSync(mid_fea)
|
||||
)
|
||||
self.conv4 = nn.Conv2d(mid_fea, out_fea, kernel_size=3, padding=1, dilation=1, bias=True)
|
||||
self.conv5 = nn.Conv2d(out_fea * 3, out_fea, kernel_size=1, padding=0, dilation=1, bias=True)
|
||||
|
||||
def forward(self, x1, x2, x3):
|
||||
_, _, h, w = x1.size()
|
||||
|
||||
edge1_fea = self.conv1(x1)
|
||||
edge1 = self.conv4(edge1_fea)
|
||||
edge2_fea = self.conv2(x2)
|
||||
edge2 = self.conv4(edge2_fea)
|
||||
edge3_fea = self.conv3(x3)
|
||||
edge3 = self.conv4(edge3_fea)
|
||||
|
||||
edge2_fea = F.interpolate(edge2_fea, size=(h, w), mode='bilinear', align_corners=True)
|
||||
edge3_fea = F.interpolate(edge3_fea, size=(h, w), mode='bilinear', align_corners=True)
|
||||
edge2 = F.interpolate(edge2, size=(h, w), mode='bilinear', align_corners=True)
|
||||
edge3 = F.interpolate(edge3, size=(h, w), mode='bilinear', align_corners=True)
|
||||
|
||||
edge = torch.cat([edge1, edge2, edge3], dim=1)
|
||||
edge_fea = torch.cat([edge1_fea, edge2_fea, edge3_fea], dim=1)
|
||||
edge = self.conv5(edge)
|
||||
|
||||
return edge, edge_fea
|
||||
|
||||
|
||||
class Decoder_Module(nn.Module):
|
||||
"""
|
||||
Parsing Branch Decoder Module.
|
||||
"""
|
||||
|
||||
def __init__(self, num_classes):
|
||||
super(Decoder_Module, self).__init__()
|
||||
self.conv1 = nn.Sequential(
|
||||
nn.Conv2d(512, 256, kernel_size=1, padding=0, dilation=1, bias=False),
|
||||
InPlaceABNSync(256)
|
||||
)
|
||||
self.conv2 = nn.Sequential(
|
||||
nn.Conv2d(256, 48, kernel_size=1, stride=1, padding=0, dilation=1, bias=False),
|
||||
InPlaceABNSync(48)
|
||||
)
|
||||
self.conv3 = nn.Sequential(
|
||||
nn.Conv2d(304, 256, kernel_size=1, padding=0, dilation=1, bias=False),
|
||||
InPlaceABNSync(256),
|
||||
nn.Conv2d(256, 256, kernel_size=1, padding=0, dilation=1, bias=False),
|
||||
InPlaceABNSync(256)
|
||||
)
|
||||
|
||||
self.conv4 = nn.Conv2d(256, num_classes, kernel_size=1, padding=0, dilation=1, bias=True)
|
||||
|
||||
def forward(self, xt, xl):
|
||||
_, _, h, w = xl.size()
|
||||
xt = F.interpolate(self.conv1(xt), size=(h, w), mode='bilinear', align_corners=True)
|
||||
xl = self.conv2(xl)
|
||||
x = torch.cat([xt, xl], dim=1)
|
||||
x = self.conv3(x)
|
||||
seg = self.conv4(x)
|
||||
return seg, x
|
||||
|
||||
|
||||
class ResNet(nn.Module):
|
||||
def __init__(self, block, layers, num_classes):
|
||||
self.inplanes = 128
|
||||
super(ResNet, self).__init__()
|
||||
self.conv1 = conv3x3(3, 64, stride=2)
|
||||
self.bn1 = BatchNorm2d(64)
|
||||
self.relu1 = nn.ReLU(inplace=False)
|
||||
self.conv2 = conv3x3(64, 64)
|
||||
self.bn2 = BatchNorm2d(64)
|
||||
self.relu2 = nn.ReLU(inplace=False)
|
||||
self.conv3 = conv3x3(64, 128)
|
||||
self.bn3 = BatchNorm2d(128)
|
||||
self.relu3 = nn.ReLU(inplace=False)
|
||||
|
||||
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
|
||||
|
||||
self.layer1 = self._make_layer(block, 64, layers[0])
|
||||
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
|
||||
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
|
||||
self.layer4 = self._make_layer(block, 512, layers[3], stride=1, dilation=2, multi_grid=(1, 1, 1))
|
||||
|
||||
self.context_encoding = PSPModule(2048, 512)
|
||||
|
||||
self.edge = Edge_Module()
|
||||
self.decoder = Decoder_Module(num_classes)
|
||||
|
||||
self.fushion = nn.Sequential(
|
||||
nn.Conv2d(1024, 256, kernel_size=1, padding=0, dilation=1, bias=False),
|
||||
InPlaceABNSync(256),
|
||||
nn.Dropout2d(0.1),
|
||||
nn.Conv2d(256, num_classes, kernel_size=1, padding=0, dilation=1, bias=True)
|
||||
)
|
||||
|
||||
def _make_layer(self, block, planes, blocks, stride=1, dilation=1, multi_grid=1):
|
||||
downsample = None
|
||||
if stride != 1 or self.inplanes != planes * block.expansion:
|
||||
downsample = nn.Sequential(
|
||||
nn.Conv2d(self.inplanes, planes * block.expansion,
|
||||
kernel_size=1, stride=stride, bias=False),
|
||||
BatchNorm2d(planes * block.expansion, affine=affine_par))
|
||||
|
||||
layers = []
|
||||
generate_multi_grid = lambda index, grids: grids[index % len(grids)] if isinstance(grids, tuple) else 1
|
||||
layers.append(block(self.inplanes, planes, stride, dilation=dilation, downsample=downsample,
|
||||
multi_grid=generate_multi_grid(0, multi_grid)))
|
||||
self.inplanes = planes * block.expansion
|
||||
for i in range(1, blocks):
|
||||
layers.append(
|
||||
block(self.inplanes, planes, dilation=dilation, multi_grid=generate_multi_grid(i, multi_grid)))
|
||||
|
||||
return nn.Sequential(*layers)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.relu1(self.bn1(self.conv1(x)))
|
||||
x = self.relu2(self.bn2(self.conv2(x)))
|
||||
x = self.relu3(self.bn3(self.conv3(x)))
|
||||
x = self.maxpool(x)
|
||||
x2 = self.layer1(x)
|
||||
x3 = self.layer2(x2)
|
||||
x4 = self.layer3(x3)
|
||||
x5 = self.layer4(x4)
|
||||
x = self.context_encoding(x5)
|
||||
parsing_result, parsing_fea = self.decoder(x, x2)
|
||||
# Edge Branch
|
||||
edge_result, edge_fea = self.edge(x2, x3, x4)
|
||||
# Fusion Branch
|
||||
x = torch.cat([parsing_fea, edge_fea], dim=1)
|
||||
fusion_result = self.fushion(x)
|
||||
return [[parsing_result, fusion_result], edge_result]
|
||||
|
||||
|
||||
def initialize_pretrained_model(model, settings, pretrained='./models/resnet101-imagenet.pth'):
|
||||
model.input_space = settings['input_space']
|
||||
model.input_size = settings['input_size']
|
||||
model.input_range = settings['input_range']
|
||||
model.mean = settings['mean']
|
||||
model.std = settings['std']
|
||||
|
||||
if pretrained is not None:
|
||||
saved_state_dict = torch.load(pretrained)
|
||||
new_params = model.state_dict().copy()
|
||||
for i in saved_state_dict:
|
||||
i_parts = i.split('.')
|
||||
if not i_parts[0] == 'fc':
|
||||
new_params['.'.join(i_parts[0:])] = saved_state_dict[i]
|
||||
model.load_state_dict(new_params)
|
||||
|
||||
|
||||
def resnet101(num_classes=20, pretrained='./models/resnet101-imagenet.pth'):
|
||||
model = ResNet(Bottleneck, [3, 4, 23, 3], num_classes)
|
||||
settings = pretrained_settings['resnet101']['imagenet']
|
||||
initialize_pretrained_model(model, settings, pretrained)
|
||||
return model
|
12
vton-api/preprocess/humanparsing/networks/__init__.py
Normal file
12
vton-api/preprocess/humanparsing/networks/__init__.py
Normal file
@@ -0,0 +1,12 @@
|
||||
from __future__ import absolute_import
|
||||
from networks.AugmentCE2P import resnet101
|
||||
|
||||
__factory = {
|
||||
'resnet101': resnet101,
|
||||
}
|
||||
|
||||
|
||||
def init_model(name, *args, **kwargs):
|
||||
if name not in __factory.keys():
|
||||
raise KeyError("Unknown model arch: {}".format(name))
|
||||
return __factory[name](*args, **kwargs)
|
@@ -0,0 +1,156 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- encoding: utf-8 -*-
|
||||
|
||||
"""
|
||||
@Author : Peike Li
|
||||
@Contact : peike.li@yahoo.com
|
||||
@File : mobilenetv2.py
|
||||
@Time : 8/4/19 3:35 PM
|
||||
@Desc :
|
||||
@License : This source code is licensed under the license found in the
|
||||
LICENSE file in the root directory of this source tree.
|
||||
"""
|
||||
|
||||
import torch.nn as nn
|
||||
import math
|
||||
import functools
|
||||
|
||||
from modules import InPlaceABN, InPlaceABNSync
|
||||
|
||||
BatchNorm2d = functools.partial(InPlaceABNSync, activation='none')
|
||||
|
||||
__all__ = ['mobilenetv2']
|
||||
|
||||
|
||||
def conv_bn(inp, oup, stride):
|
||||
return nn.Sequential(
|
||||
nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
|
||||
BatchNorm2d(oup),
|
||||
nn.ReLU6(inplace=True)
|
||||
)
|
||||
|
||||
|
||||
def conv_1x1_bn(inp, oup):
|
||||
return nn.Sequential(
|
||||
nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
|
||||
BatchNorm2d(oup),
|
||||
nn.ReLU6(inplace=True)
|
||||
)
|
||||
|
||||
|
||||
class InvertedResidual(nn.Module):
|
||||
def __init__(self, inp, oup, stride, expand_ratio):
|
||||
super(InvertedResidual, self).__init__()
|
||||
self.stride = stride
|
||||
assert stride in [1, 2]
|
||||
|
||||
hidden_dim = round(inp * expand_ratio)
|
||||
self.use_res_connect = self.stride == 1 and inp == oup
|
||||
|
||||
if expand_ratio == 1:
|
||||
self.conv = nn.Sequential(
|
||||
# dw
|
||||
nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
|
||||
BatchNorm2d(hidden_dim),
|
||||
nn.ReLU6(inplace=True),
|
||||
# pw-linear
|
||||
nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
|
||||
BatchNorm2d(oup),
|
||||
)
|
||||
else:
|
||||
self.conv = nn.Sequential(
|
||||
# pw
|
||||
nn.Conv2d(inp, hidden_dim, 1, 1, 0, bias=False),
|
||||
BatchNorm2d(hidden_dim),
|
||||
nn.ReLU6(inplace=True),
|
||||
# dw
|
||||
nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
|
||||
BatchNorm2d(hidden_dim),
|
||||
nn.ReLU6(inplace=True),
|
||||
# pw-linear
|
||||
nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
|
||||
BatchNorm2d(oup),
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
if self.use_res_connect:
|
||||
return x + self.conv(x)
|
||||
else:
|
||||
return self.conv(x)
|
||||
|
||||
|
||||
class MobileNetV2(nn.Module):
|
||||
def __init__(self, n_class=1000, input_size=224, width_mult=1.):
|
||||
super(MobileNetV2, self).__init__()
|
||||
block = InvertedResidual
|
||||
input_channel = 32
|
||||
last_channel = 1280
|
||||
interverted_residual_setting = [
|
||||
# t, c, n, s
|
||||
[1, 16, 1, 1],
|
||||
[6, 24, 2, 2], # layer 2
|
||||
[6, 32, 3, 2], # layer 3
|
||||
[6, 64, 4, 2],
|
||||
[6, 96, 3, 1], # layer 4
|
||||
[6, 160, 3, 2],
|
||||
[6, 320, 1, 1], # layer 5
|
||||
]
|
||||
|
||||
# building first layer
|
||||
assert input_size % 32 == 0
|
||||
input_channel = int(input_channel * width_mult)
|
||||
self.last_channel = int(last_channel * width_mult) if width_mult > 1.0 else last_channel
|
||||
self.features = [conv_bn(3, input_channel, 2)]
|
||||
# building inverted residual blocks
|
||||
for t, c, n, s in interverted_residual_setting:
|
||||
output_channel = int(c * width_mult)
|
||||
for i in range(n):
|
||||
if i == 0:
|
||||
self.features.append(block(input_channel, output_channel, s, expand_ratio=t))
|
||||
else:
|
||||
self.features.append(block(input_channel, output_channel, 1, expand_ratio=t))
|
||||
input_channel = output_channel
|
||||
# building last several layers
|
||||
self.features.append(conv_1x1_bn(input_channel, self.last_channel))
|
||||
# make it nn.Sequential
|
||||
self.features = nn.Sequential(*self.features)
|
||||
|
||||
# building classifier
|
||||
self.classifier = nn.Sequential(
|
||||
nn.Dropout(0.2),
|
||||
nn.Linear(self.last_channel, n_class),
|
||||
)
|
||||
|
||||
self._initialize_weights()
|
||||
|
||||
def forward(self, x):
|
||||
x = self.features(x)
|
||||
x = x.mean(3).mean(2)
|
||||
x = self.classifier(x)
|
||||
return x
|
||||
|
||||
def _initialize_weights(self):
|
||||
for m in self.modules():
|
||||
if isinstance(m, nn.Conv2d):
|
||||
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
|
||||
m.weight.data.normal_(0, math.sqrt(2. / n))
|
||||
if m.bias is not None:
|
||||
m.bias.data.zero_()
|
||||
elif isinstance(m, BatchNorm2d):
|
||||
m.weight.data.fill_(1)
|
||||
m.bias.data.zero_()
|
||||
elif isinstance(m, nn.Linear):
|
||||
n = m.weight.size(1)
|
||||
m.weight.data.normal_(0, 0.01)
|
||||
m.bias.data.zero_()
|
||||
|
||||
|
||||
def mobilenetv2(pretrained=False, **kwargs):
|
||||
"""Constructs a MobileNet_V2 model.
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
"""
|
||||
model = MobileNetV2(n_class=1000, **kwargs)
|
||||
if pretrained:
|
||||
model.load_state_dict(load_url(model_urls['mobilenetv2']), strict=False)
|
||||
return model
|
205
vton-api/preprocess/humanparsing/networks/backbone/resnet.py
Normal file
205
vton-api/preprocess/humanparsing/networks/backbone/resnet.py
Normal file
@@ -0,0 +1,205 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- encoding: utf-8 -*-
|
||||
|
||||
"""
|
||||
@Author : Peike Li
|
||||
@Contact : peike.li@yahoo.com
|
||||
@File : resnet.py
|
||||
@Time : 8/4/19 3:35 PM
|
||||
@Desc :
|
||||
@License : This source code is licensed under the license found in the
|
||||
LICENSE file in the root directory of this source tree.
|
||||
"""
|
||||
|
||||
import functools
|
||||
import torch.nn as nn
|
||||
import math
|
||||
from torch.utils.model_zoo import load_url
|
||||
|
||||
from modules import InPlaceABNSync
|
||||
|
||||
BatchNorm2d = functools.partial(InPlaceABNSync, activation='none')
|
||||
|
||||
__all__ = ['ResNet', 'resnet18', 'resnet50', 'resnet101'] # resnet101 is coming soon!
|
||||
|
||||
model_urls = {
|
||||
'resnet18': 'http://sceneparsing.csail.mit.edu/model/pretrained_resnet/resnet18-imagenet.pth',
|
||||
'resnet50': 'http://sceneparsing.csail.mit.edu/model/pretrained_resnet/resnet50-imagenet.pth',
|
||||
'resnet101': 'http://sceneparsing.csail.mit.edu/model/pretrained_resnet/resnet101-imagenet.pth'
|
||||
}
|
||||
|
||||
|
||||
def conv3x3(in_planes, out_planes, stride=1):
|
||||
"3x3 convolution with padding"
|
||||
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
|
||||
padding=1, bias=False)
|
||||
|
||||
|
||||
class BasicBlock(nn.Module):
|
||||
expansion = 1
|
||||
|
||||
def __init__(self, inplanes, planes, stride=1, downsample=None):
|
||||
super(BasicBlock, self).__init__()
|
||||
self.conv1 = conv3x3(inplanes, planes, stride)
|
||||
self.bn1 = BatchNorm2d(planes)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
self.conv2 = conv3x3(planes, planes)
|
||||
self.bn2 = BatchNorm2d(planes)
|
||||
self.downsample = downsample
|
||||
self.stride = stride
|
||||
|
||||
def forward(self, x):
|
||||
residual = x
|
||||
|
||||
out = self.conv1(x)
|
||||
out = self.bn1(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = self.conv2(out)
|
||||
out = self.bn2(out)
|
||||
|
||||
if self.downsample is not None:
|
||||
residual = self.downsample(x)
|
||||
|
||||
out += residual
|
||||
out = self.relu(out)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class Bottleneck(nn.Module):
|
||||
expansion = 4
|
||||
|
||||
def __init__(self, inplanes, planes, stride=1, downsample=None):
|
||||
super(Bottleneck, self).__init__()
|
||||
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
|
||||
self.bn1 = BatchNorm2d(planes)
|
||||
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
|
||||
padding=1, bias=False)
|
||||
self.bn2 = BatchNorm2d(planes)
|
||||
self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
|
||||
self.bn3 = BatchNorm2d(planes * 4)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
self.downsample = downsample
|
||||
self.stride = stride
|
||||
|
||||
def forward(self, x):
|
||||
residual = x
|
||||
|
||||
out = self.conv1(x)
|
||||
out = self.bn1(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = self.conv2(out)
|
||||
out = self.bn2(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = self.conv3(out)
|
||||
out = self.bn3(out)
|
||||
|
||||
if self.downsample is not None:
|
||||
residual = self.downsample(x)
|
||||
|
||||
out += residual
|
||||
out = self.relu(out)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class ResNet(nn.Module):
|
||||
|
||||
def __init__(self, block, layers, num_classes=1000):
|
||||
self.inplanes = 128
|
||||
super(ResNet, self).__init__()
|
||||
self.conv1 = conv3x3(3, 64, stride=2)
|
||||
self.bn1 = BatchNorm2d(64)
|
||||
self.relu1 = nn.ReLU(inplace=True)
|
||||
self.conv2 = conv3x3(64, 64)
|
||||
self.bn2 = BatchNorm2d(64)
|
||||
self.relu2 = nn.ReLU(inplace=True)
|
||||
self.conv3 = conv3x3(64, 128)
|
||||
self.bn3 = BatchNorm2d(128)
|
||||
self.relu3 = nn.ReLU(inplace=True)
|
||||
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
|
||||
|
||||
self.layer1 = self._make_layer(block, 64, layers[0])
|
||||
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
|
||||
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
|
||||
self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
|
||||
self.avgpool = nn.AvgPool2d(7, stride=1)
|
||||
self.fc = nn.Linear(512 * block.expansion, num_classes)
|
||||
|
||||
for m in self.modules():
|
||||
if isinstance(m, nn.Conv2d):
|
||||
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
|
||||
m.weight.data.normal_(0, math.sqrt(2. / n))
|
||||
elif isinstance(m, BatchNorm2d):
|
||||
m.weight.data.fill_(1)
|
||||
m.bias.data.zero_()
|
||||
|
||||
def _make_layer(self, block, planes, blocks, stride=1):
|
||||
downsample = None
|
||||
if stride != 1 or self.inplanes != planes * block.expansion:
|
||||
downsample = nn.Sequential(
|
||||
nn.Conv2d(self.inplanes, planes * block.expansion,
|
||||
kernel_size=1, stride=stride, bias=False),
|
||||
BatchNorm2d(planes * block.expansion),
|
||||
)
|
||||
|
||||
layers = []
|
||||
layers.append(block(self.inplanes, planes, stride, downsample))
|
||||
self.inplanes = planes * block.expansion
|
||||
for i in range(1, blocks):
|
||||
layers.append(block(self.inplanes, planes))
|
||||
|
||||
return nn.Sequential(*layers)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.relu1(self.bn1(self.conv1(x)))
|
||||
x = self.relu2(self.bn2(self.conv2(x)))
|
||||
x = self.relu3(self.bn3(self.conv3(x)))
|
||||
x = self.maxpool(x)
|
||||
|
||||
x = self.layer1(x)
|
||||
x = self.layer2(x)
|
||||
x = self.layer3(x)
|
||||
x = self.layer4(x)
|
||||
|
||||
x = self.avgpool(x)
|
||||
x = x.view(x.size(0), -1)
|
||||
x = self.fc(x)
|
||||
|
||||
return x
|
||||
|
||||
|
||||
def resnet18(pretrained=False, **kwargs):
|
||||
"""Constructs a ResNet-18 model.
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
"""
|
||||
model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
|
||||
if pretrained:
|
||||
model.load_state_dict(load_url(model_urls['resnet18']))
|
||||
return model
|
||||
|
||||
|
||||
def resnet50(pretrained=False, **kwargs):
|
||||
"""Constructs a ResNet-50 model.
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
"""
|
||||
model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
|
||||
if pretrained:
|
||||
model.load_state_dict(load_url(model_urls['resnet50']), strict=False)
|
||||
return model
|
||||
|
||||
|
||||
def resnet101(pretrained=False, **kwargs):
|
||||
"""Constructs a ResNet-101 model.
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
"""
|
||||
model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
|
||||
if pretrained:
|
||||
model.load_state_dict(load_url(model_urls['resnet101']), strict=False)
|
||||
return model
|
149
vton-api/preprocess/humanparsing/networks/backbone/resnext.py
Normal file
149
vton-api/preprocess/humanparsing/networks/backbone/resnext.py
Normal file
@@ -0,0 +1,149 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- encoding: utf-8 -*-
|
||||
|
||||
"""
|
||||
@Author : Peike Li
|
||||
@Contact : peike.li@yahoo.com
|
||||
@File : resnext.py.py
|
||||
@Time : 8/11/19 8:58 PM
|
||||
@Desc :
|
||||
@License : This source code is licensed under the license found in the
|
||||
LICENSE file in the root directory of this source tree.
|
||||
"""
|
||||
import functools
|
||||
import torch.nn as nn
|
||||
import math
|
||||
from torch.utils.model_zoo import load_url
|
||||
|
||||
from modules import InPlaceABNSync
|
||||
|
||||
BatchNorm2d = functools.partial(InPlaceABNSync, activation='none')
|
||||
|
||||
__all__ = ['ResNeXt', 'resnext101'] # support resnext 101
|
||||
|
||||
model_urls = {
|
||||
'resnext50': 'http://sceneparsing.csail.mit.edu/model/pretrained_resnet/resnext50-imagenet.pth',
|
||||
'resnext101': 'http://sceneparsing.csail.mit.edu/model/pretrained_resnet/resnext101-imagenet.pth'
|
||||
}
|
||||
|
||||
|
||||
def conv3x3(in_planes, out_planes, stride=1):
|
||||
"3x3 convolution with padding"
|
||||
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
|
||||
padding=1, bias=False)
|
||||
|
||||
|
||||
class GroupBottleneck(nn.Module):
|
||||
expansion = 2
|
||||
|
||||
def __init__(self, inplanes, planes, stride=1, groups=1, downsample=None):
|
||||
super(GroupBottleneck, self).__init__()
|
||||
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
|
||||
self.bn1 = BatchNorm2d(planes)
|
||||
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
|
||||
padding=1, groups=groups, bias=False)
|
||||
self.bn2 = BatchNorm2d(planes)
|
||||
self.conv3 = nn.Conv2d(planes, planes * 2, kernel_size=1, bias=False)
|
||||
self.bn3 = BatchNorm2d(planes * 2)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
self.downsample = downsample
|
||||
self.stride = stride
|
||||
|
||||
def forward(self, x):
|
||||
residual = x
|
||||
|
||||
out = self.conv1(x)
|
||||
out = self.bn1(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = self.conv2(out)
|
||||
out = self.bn2(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = self.conv3(out)
|
||||
out = self.bn3(out)
|
||||
|
||||
if self.downsample is not None:
|
||||
residual = self.downsample(x)
|
||||
|
||||
out += residual
|
||||
out = self.relu(out)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class ResNeXt(nn.Module):
|
||||
|
||||
def __init__(self, block, layers, groups=32, num_classes=1000):
|
||||
self.inplanes = 128
|
||||
super(ResNeXt, self).__init__()
|
||||
self.conv1 = conv3x3(3, 64, stride=2)
|
||||
self.bn1 = BatchNorm2d(64)
|
||||
self.relu1 = nn.ReLU(inplace=True)
|
||||
self.conv2 = conv3x3(64, 64)
|
||||
self.bn2 = BatchNorm2d(64)
|
||||
self.relu2 = nn.ReLU(inplace=True)
|
||||
self.conv3 = conv3x3(64, 128)
|
||||
self.bn3 = BatchNorm2d(128)
|
||||
self.relu3 = nn.ReLU(inplace=True)
|
||||
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
|
||||
|
||||
self.layer1 = self._make_layer(block, 128, layers[0], groups=groups)
|
||||
self.layer2 = self._make_layer(block, 256, layers[1], stride=2, groups=groups)
|
||||
self.layer3 = self._make_layer(block, 512, layers[2], stride=2, groups=groups)
|
||||
self.layer4 = self._make_layer(block, 1024, layers[3], stride=2, groups=groups)
|
||||
self.avgpool = nn.AvgPool2d(7, stride=1)
|
||||
self.fc = nn.Linear(1024 * block.expansion, num_classes)
|
||||
|
||||
for m in self.modules():
|
||||
if isinstance(m, nn.Conv2d):
|
||||
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels // m.groups
|
||||
m.weight.data.normal_(0, math.sqrt(2. / n))
|
||||
elif isinstance(m, BatchNorm2d):
|
||||
m.weight.data.fill_(1)
|
||||
m.bias.data.zero_()
|
||||
|
||||
def _make_layer(self, block, planes, blocks, stride=1, groups=1):
|
||||
downsample = None
|
||||
if stride != 1 or self.inplanes != planes * block.expansion:
|
||||
downsample = nn.Sequential(
|
||||
nn.Conv2d(self.inplanes, planes * block.expansion,
|
||||
kernel_size=1, stride=stride, bias=False),
|
||||
BatchNorm2d(planes * block.expansion),
|
||||
)
|
||||
|
||||
layers = []
|
||||
layers.append(block(self.inplanes, planes, stride, groups, downsample))
|
||||
self.inplanes = planes * block.expansion
|
||||
for i in range(1, blocks):
|
||||
layers.append(block(self.inplanes, planes, groups=groups))
|
||||
|
||||
return nn.Sequential(*layers)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.relu1(self.bn1(self.conv1(x)))
|
||||
x = self.relu2(self.bn2(self.conv2(x)))
|
||||
x = self.relu3(self.bn3(self.conv3(x)))
|
||||
x = self.maxpool(x)
|
||||
|
||||
x = self.layer1(x)
|
||||
x = self.layer2(x)
|
||||
x = self.layer3(x)
|
||||
x = self.layer4(x)
|
||||
|
||||
x = self.avgpool(x)
|
||||
x = x.view(x.size(0), -1)
|
||||
x = self.fc(x)
|
||||
|
||||
return x
|
||||
|
||||
|
||||
def resnext101(pretrained=False, **kwargs):
|
||||
"""Constructs a ResNet-101 model.
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on Places
|
||||
"""
|
||||
model = ResNeXt(GroupBottleneck, [3, 4, 23, 3], **kwargs)
|
||||
if pretrained:
|
||||
model.load_state_dict(load_url(model_urls['resnext101']), strict=False)
|
||||
return model
|
@@ -0,0 +1,64 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- encoding: utf-8 -*-
|
||||
|
||||
"""
|
||||
@Author : Peike Li
|
||||
@Contact : peike.li@yahoo.com
|
||||
@File : aspp.py
|
||||
@Time : 8/4/19 3:36 PM
|
||||
@Desc :
|
||||
@License : This source code is licensed under the license found in the
|
||||
LICENSE file in the root directory of this source tree.
|
||||
"""
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from torch.nn import functional as F
|
||||
|
||||
from modules import InPlaceABNSync
|
||||
|
||||
|
||||
class ASPPModule(nn.Module):
|
||||
"""
|
||||
Reference:
|
||||
Chen, Liang-Chieh, et al. *"Rethinking Atrous Convolution for Semantic Image Segmentation."*
|
||||
"""
|
||||
def __init__(self, features, out_features=512, inner_features=256, dilations=(12, 24, 36)):
|
||||
super(ASPPModule, self).__init__()
|
||||
|
||||
self.conv1 = nn.Sequential(nn.AdaptiveAvgPool2d((1, 1)),
|
||||
nn.Conv2d(features, inner_features, kernel_size=1, padding=0, dilation=1,
|
||||
bias=False),
|
||||
InPlaceABNSync(inner_features))
|
||||
self.conv2 = nn.Sequential(
|
||||
nn.Conv2d(features, inner_features, kernel_size=1, padding=0, dilation=1, bias=False),
|
||||
InPlaceABNSync(inner_features))
|
||||
self.conv3 = nn.Sequential(
|
||||
nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[0], dilation=dilations[0], bias=False),
|
||||
InPlaceABNSync(inner_features))
|
||||
self.conv4 = nn.Sequential(
|
||||
nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[1], dilation=dilations[1], bias=False),
|
||||
InPlaceABNSync(inner_features))
|
||||
self.conv5 = nn.Sequential(
|
||||
nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[2], dilation=dilations[2], bias=False),
|
||||
InPlaceABNSync(inner_features))
|
||||
|
||||
self.bottleneck = nn.Sequential(
|
||||
nn.Conv2d(inner_features * 5, out_features, kernel_size=1, padding=0, dilation=1, bias=False),
|
||||
InPlaceABNSync(out_features),
|
||||
nn.Dropout2d(0.1)
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
_, _, h, w = x.size()
|
||||
|
||||
feat1 = F.interpolate(self.conv1(x), size=(h, w), mode='bilinear', align_corners=True)
|
||||
|
||||
feat2 = self.conv2(x)
|
||||
feat3 = self.conv3(x)
|
||||
feat4 = self.conv4(x)
|
||||
feat5 = self.conv5(x)
|
||||
out = torch.cat((feat1, feat2, feat3, feat4, feat5), 1)
|
||||
|
||||
bottle = self.bottleneck(out)
|
||||
return bottle
|
@@ -0,0 +1,226 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- encoding: utf-8 -*-
|
||||
|
||||
"""
|
||||
@Author : Peike Li
|
||||
@Contact : peike.li@yahoo.com
|
||||
@File : ocnet.py
|
||||
@Time : 8/4/19 3:36 PM
|
||||
@Desc :
|
||||
@License : This source code is licensed under the license found in the
|
||||
LICENSE file in the root directory of this source tree.
|
||||
"""
|
||||
|
||||
import functools
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from torch.autograd import Variable
|
||||
from torch.nn import functional as F
|
||||
|
||||
from modules import InPlaceABNSync
|
||||
BatchNorm2d = functools.partial(InPlaceABNSync, activation='none')
|
||||
|
||||
|
||||
class _SelfAttentionBlock(nn.Module):
|
||||
'''
|
||||
The basic implementation for self-attention block/non-local block
|
||||
Input:
|
||||
N X C X H X W
|
||||
Parameters:
|
||||
in_channels : the dimension of the input feature map
|
||||
key_channels : the dimension after the key/query transform
|
||||
value_channels : the dimension after the value transform
|
||||
scale : choose the scale to downsample the input feature maps (save memory cost)
|
||||
Return:
|
||||
N X C X H X W
|
||||
position-aware context features.(w/o concate or add with the input)
|
||||
'''
|
||||
|
||||
def __init__(self, in_channels, key_channels, value_channels, out_channels=None, scale=1):
|
||||
super(_SelfAttentionBlock, self).__init__()
|
||||
self.scale = scale
|
||||
self.in_channels = in_channels
|
||||
self.out_channels = out_channels
|
||||
self.key_channels = key_channels
|
||||
self.value_channels = value_channels
|
||||
if out_channels == None:
|
||||
self.out_channels = in_channels
|
||||
self.pool = nn.MaxPool2d(kernel_size=(scale, scale))
|
||||
self.f_key = nn.Sequential(
|
||||
nn.Conv2d(in_channels=self.in_channels, out_channels=self.key_channels,
|
||||
kernel_size=1, stride=1, padding=0),
|
||||
InPlaceABNSync(self.key_channels),
|
||||
)
|
||||
self.f_query = self.f_key
|
||||
self.f_value = nn.Conv2d(in_channels=self.in_channels, out_channels=self.value_channels,
|
||||
kernel_size=1, stride=1, padding=0)
|
||||
self.W = nn.Conv2d(in_channels=self.value_channels, out_channels=self.out_channels,
|
||||
kernel_size=1, stride=1, padding=0)
|
||||
nn.init.constant(self.W.weight, 0)
|
||||
nn.init.constant(self.W.bias, 0)
|
||||
|
||||
def forward(self, x):
|
||||
batch_size, h, w = x.size(0), x.size(2), x.size(3)
|
||||
if self.scale > 1:
|
||||
x = self.pool(x)
|
||||
|
||||
value = self.f_value(x).view(batch_size, self.value_channels, -1)
|
||||
value = value.permute(0, 2, 1)
|
||||
query = self.f_query(x).view(batch_size, self.key_channels, -1)
|
||||
query = query.permute(0, 2, 1)
|
||||
key = self.f_key(x).view(batch_size, self.key_channels, -1)
|
||||
|
||||
sim_map = torch.matmul(query, key)
|
||||
sim_map = (self.key_channels ** -.5) * sim_map
|
||||
sim_map = F.softmax(sim_map, dim=-1)
|
||||
|
||||
context = torch.matmul(sim_map, value)
|
||||
context = context.permute(0, 2, 1).contiguous()
|
||||
context = context.view(batch_size, self.value_channels, *x.size()[2:])
|
||||
context = self.W(context)
|
||||
if self.scale > 1:
|
||||
context = F.upsample(input=context, size=(h, w), mode='bilinear', align_corners=True)
|
||||
return context
|
||||
|
||||
|
||||
class SelfAttentionBlock2D(_SelfAttentionBlock):
|
||||
def __init__(self, in_channels, key_channels, value_channels, out_channels=None, scale=1):
|
||||
super(SelfAttentionBlock2D, self).__init__(in_channels,
|
||||
key_channels,
|
||||
value_channels,
|
||||
out_channels,
|
||||
scale)
|
||||
|
||||
|
||||
class BaseOC_Module(nn.Module):
|
||||
"""
|
||||
Implementation of the BaseOC module
|
||||
Parameters:
|
||||
in_features / out_features: the channels of the input / output feature maps.
|
||||
dropout: we choose 0.05 as the default value.
|
||||
size: you can apply multiple sizes. Here we only use one size.
|
||||
Return:
|
||||
features fused with Object context information.
|
||||
"""
|
||||
|
||||
def __init__(self, in_channels, out_channels, key_channels, value_channels, dropout, sizes=([1])):
|
||||
super(BaseOC_Module, self).__init__()
|
||||
self.stages = []
|
||||
self.stages = nn.ModuleList(
|
||||
[self._make_stage(in_channels, out_channels, key_channels, value_channels, size) for size in sizes])
|
||||
self.conv_bn_dropout = nn.Sequential(
|
||||
nn.Conv2d(2 * in_channels, out_channels, kernel_size=1, padding=0),
|
||||
InPlaceABNSync(out_channels),
|
||||
nn.Dropout2d(dropout)
|
||||
)
|
||||
|
||||
def _make_stage(self, in_channels, output_channels, key_channels, value_channels, size):
|
||||
return SelfAttentionBlock2D(in_channels,
|
||||
key_channels,
|
||||
value_channels,
|
||||
output_channels,
|
||||
size)
|
||||
|
||||
def forward(self, feats):
|
||||
priors = [stage(feats) for stage in self.stages]
|
||||
context = priors[0]
|
||||
for i in range(1, len(priors)):
|
||||
context += priors[i]
|
||||
output = self.conv_bn_dropout(torch.cat([context, feats], 1))
|
||||
return output
|
||||
|
||||
|
||||
class BaseOC_Context_Module(nn.Module):
|
||||
"""
|
||||
Output only the context features.
|
||||
Parameters:
|
||||
in_features / out_features: the channels of the input / output feature maps.
|
||||
dropout: specify the dropout ratio
|
||||
fusion: We provide two different fusion method, "concat" or "add"
|
||||
size: we find that directly learn the attention weights on even 1/8 feature maps is hard.
|
||||
Return:
|
||||
features after "concat" or "add"
|
||||
"""
|
||||
|
||||
def __init__(self, in_channels, out_channels, key_channels, value_channels, dropout, sizes=([1])):
|
||||
super(BaseOC_Context_Module, self).__init__()
|
||||
self.stages = []
|
||||
self.stages = nn.ModuleList(
|
||||
[self._make_stage(in_channels, out_channels, key_channels, value_channels, size) for size in sizes])
|
||||
self.conv_bn_dropout = nn.Sequential(
|
||||
nn.Conv2d(in_channels, out_channels, kernel_size=1, padding=0),
|
||||
InPlaceABNSync(out_channels),
|
||||
)
|
||||
|
||||
def _make_stage(self, in_channels, output_channels, key_channels, value_channels, size):
|
||||
return SelfAttentionBlock2D(in_channels,
|
||||
key_channels,
|
||||
value_channels,
|
||||
output_channels,
|
||||
size)
|
||||
|
||||
def forward(self, feats):
|
||||
priors = [stage(feats) for stage in self.stages]
|
||||
context = priors[0]
|
||||
for i in range(1, len(priors)):
|
||||
context += priors[i]
|
||||
output = self.conv_bn_dropout(context)
|
||||
return output
|
||||
|
||||
|
||||
class ASP_OC_Module(nn.Module):
|
||||
def __init__(self, features, out_features=256, dilations=(12, 24, 36)):
|
||||
super(ASP_OC_Module, self).__init__()
|
||||
self.context = nn.Sequential(nn.Conv2d(features, out_features, kernel_size=3, padding=1, dilation=1, bias=True),
|
||||
InPlaceABNSync(out_features),
|
||||
BaseOC_Context_Module(in_channels=out_features, out_channels=out_features,
|
||||
key_channels=out_features // 2, value_channels=out_features,
|
||||
dropout=0, sizes=([2])))
|
||||
self.conv2 = nn.Sequential(nn.Conv2d(features, out_features, kernel_size=1, padding=0, dilation=1, bias=False),
|
||||
InPlaceABNSync(out_features))
|
||||
self.conv3 = nn.Sequential(
|
||||
nn.Conv2d(features, out_features, kernel_size=3, padding=dilations[0], dilation=dilations[0], bias=False),
|
||||
InPlaceABNSync(out_features))
|
||||
self.conv4 = nn.Sequential(
|
||||
nn.Conv2d(features, out_features, kernel_size=3, padding=dilations[1], dilation=dilations[1], bias=False),
|
||||
InPlaceABNSync(out_features))
|
||||
self.conv5 = nn.Sequential(
|
||||
nn.Conv2d(features, out_features, kernel_size=3, padding=dilations[2], dilation=dilations[2], bias=False),
|
||||
InPlaceABNSync(out_features))
|
||||
|
||||
self.conv_bn_dropout = nn.Sequential(
|
||||
nn.Conv2d(out_features * 5, out_features, kernel_size=1, padding=0, dilation=1, bias=False),
|
||||
InPlaceABNSync(out_features),
|
||||
nn.Dropout2d(0.1)
|
||||
)
|
||||
|
||||
def _cat_each(self, feat1, feat2, feat3, feat4, feat5):
|
||||
assert (len(feat1) == len(feat2))
|
||||
z = []
|
||||
for i in range(len(feat1)):
|
||||
z.append(torch.cat((feat1[i], feat2[i], feat3[i], feat4[i], feat5[i]), 1))
|
||||
return z
|
||||
|
||||
def forward(self, x):
|
||||
if isinstance(x, Variable):
|
||||
_, _, h, w = x.size()
|
||||
elif isinstance(x, tuple) or isinstance(x, list):
|
||||
_, _, h, w = x[0].size()
|
||||
else:
|
||||
raise RuntimeError('unknown input type')
|
||||
|
||||
feat1 = self.context(x)
|
||||
feat2 = self.conv2(x)
|
||||
feat3 = self.conv3(x)
|
||||
feat4 = self.conv4(x)
|
||||
feat5 = self.conv5(x)
|
||||
|
||||
if isinstance(x, Variable):
|
||||
out = torch.cat((feat1, feat2, feat3, feat4, feat5), 1)
|
||||
elif isinstance(x, tuple) or isinstance(x, list):
|
||||
out = self._cat_each(feat1, feat2, feat3, feat4, feat5)
|
||||
else:
|
||||
raise RuntimeError('unknown input type')
|
||||
output = self.conv_bn_dropout(out)
|
||||
return output
|
@@ -0,0 +1,48 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- encoding: utf-8 -*-
|
||||
|
||||
"""
|
||||
@Author : Peike Li
|
||||
@Contact : peike.li@yahoo.com
|
||||
@File : psp.py
|
||||
@Time : 8/4/19 3:36 PM
|
||||
@Desc :
|
||||
@License : This source code is licensed under the license found in the
|
||||
LICENSE file in the root directory of this source tree.
|
||||
"""
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from torch.nn import functional as F
|
||||
|
||||
from modules import InPlaceABNSync
|
||||
|
||||
|
||||
class PSPModule(nn.Module):
|
||||
"""
|
||||
Reference:
|
||||
Zhao, Hengshuang, et al. *"Pyramid scene parsing network."*
|
||||
"""
|
||||
def __init__(self, features, out_features=512, sizes=(1, 2, 3, 6)):
|
||||
super(PSPModule, self).__init__()
|
||||
|
||||
self.stages = []
|
||||
self.stages = nn.ModuleList([self._make_stage(features, out_features, size) for size in sizes])
|
||||
self.bottleneck = nn.Sequential(
|
||||
nn.Conv2d(features + len(sizes) * out_features, out_features, kernel_size=3, padding=1, dilation=1,
|
||||
bias=False),
|
||||
InPlaceABNSync(out_features),
|
||||
)
|
||||
|
||||
def _make_stage(self, features, out_features, size):
|
||||
prior = nn.AdaptiveAvgPool2d(output_size=(size, size))
|
||||
conv = nn.Conv2d(features, out_features, kernel_size=1, bias=False)
|
||||
bn = InPlaceABNSync(out_features)
|
||||
return nn.Sequential(prior, conv, bn)
|
||||
|
||||
def forward(self, feats):
|
||||
h, w = feats.size(2), feats.size(3)
|
||||
priors = [F.interpolate(input=stage(feats), size=(h, w), mode='bilinear', align_corners=True) for stage in
|
||||
self.stages] + [feats]
|
||||
bottle = self.bottleneck(torch.cat(priors, 1))
|
||||
return bottle
|
Reference in New Issue
Block a user