Add at new repo again
This commit is contained in:
0
vton-api/preprocess/humanparsing/utils/__init__.py
Normal file
0
vton-api/preprocess/humanparsing/utils/__init__.py
Normal file
Binary file not shown.
Binary file not shown.
33
vton-api/preprocess/humanparsing/utils/consistency_loss.py
Normal file
33
vton-api/preprocess/humanparsing/utils/consistency_loss.py
Normal file
@@ -0,0 +1,33 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- encoding: utf-8 -*-
|
||||
|
||||
"""
|
||||
@Author : Peike Li
|
||||
@Contact : peike.li@yahoo.com
|
||||
@File : kl_loss.py
|
||||
@Time : 7/23/19 4:02 PM
|
||||
@Desc :
|
||||
@License : This source code is licensed under the license found in the
|
||||
LICENSE file in the root directory of this source tree.
|
||||
"""
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from torch import nn
|
||||
from datasets.target_generation import generate_edge_tensor
|
||||
|
||||
|
||||
class ConsistencyLoss(nn.Module):
|
||||
def __init__(self, ignore_index=255):
|
||||
super(ConsistencyLoss, self).__init__()
|
||||
self.ignore_index=ignore_index
|
||||
|
||||
def forward(self, parsing, edge, label):
|
||||
parsing_pre = torch.argmax(parsing, dim=1)
|
||||
parsing_pre[label==self.ignore_index]=self.ignore_index
|
||||
generated_edge = generate_edge_tensor(parsing_pre)
|
||||
edge_pre = torch.argmax(edge, dim=1)
|
||||
v_generate_edge = generated_edge[label!=255]
|
||||
v_edge_pre = edge_pre[label!=255]
|
||||
v_edge_pre = v_edge_pre.type(torch.cuda.FloatTensor)
|
||||
positive_union = (v_generate_edge==1)&(v_edge_pre==1) # only the positive values count
|
||||
return F.smooth_l1_loss(v_generate_edge[positive_union].squeeze(0), v_edge_pre[positive_union].squeeze(0))
|
142
vton-api/preprocess/humanparsing/utils/criterion.py
Normal file
142
vton-api/preprocess/humanparsing/utils/criterion.py
Normal file
@@ -0,0 +1,142 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- encoding: utf-8 -*-
|
||||
|
||||
"""
|
||||
@Author : Peike Li
|
||||
@Contact : peike.li@yahoo.com
|
||||
@File : criterion.py
|
||||
@Time : 8/30/19 8:59 PM
|
||||
@Desc :
|
||||
@License : This source code is licensed under the license found in the
|
||||
LICENSE file in the root directory of this source tree.
|
||||
"""
|
||||
|
||||
import torch.nn as nn
|
||||
import torch
|
||||
import numpy as np
|
||||
from torch.nn import functional as F
|
||||
from .lovasz_softmax import LovaszSoftmax
|
||||
from .kl_loss import KLDivergenceLoss
|
||||
from .consistency_loss import ConsistencyLoss
|
||||
|
||||
NUM_CLASSES = 20
|
||||
|
||||
|
||||
class CriterionAll(nn.Module):
|
||||
def __init__(self, use_class_weight=False, ignore_index=255, lambda_1=1, lambda_2=1, lambda_3=1,
|
||||
num_classes=20):
|
||||
super(CriterionAll, self).__init__()
|
||||
self.ignore_index = ignore_index
|
||||
self.use_class_weight = use_class_weight
|
||||
self.criterion = torch.nn.CrossEntropyLoss(ignore_index=ignore_index)
|
||||
self.lovasz = LovaszSoftmax(ignore_index=ignore_index)
|
||||
self.kldiv = KLDivergenceLoss(ignore_index=ignore_index)
|
||||
self.reg = ConsistencyLoss(ignore_index=ignore_index)
|
||||
self.lamda_1 = lambda_1
|
||||
self.lamda_2 = lambda_2
|
||||
self.lamda_3 = lambda_3
|
||||
self.num_classes = num_classes
|
||||
|
||||
def parsing_loss(self, preds, target, cycle_n=None):
|
||||
"""
|
||||
Loss function definition.
|
||||
|
||||
Args:
|
||||
preds: [[parsing result1, parsing result2],[edge result]]
|
||||
target: [parsing label, egde label]
|
||||
soft_preds: [[parsing result1, parsing result2],[edge result]]
|
||||
Returns:
|
||||
Calculated Loss.
|
||||
"""
|
||||
h, w = target[0].size(1), target[0].size(2)
|
||||
|
||||
pos_num = torch.sum(target[1] == 1, dtype=torch.float)
|
||||
neg_num = torch.sum(target[1] == 0, dtype=torch.float)
|
||||
|
||||
weight_pos = neg_num / (pos_num + neg_num)
|
||||
weight_neg = pos_num / (pos_num + neg_num)
|
||||
weights = torch.tensor([weight_neg, weight_pos]) # edge loss weight
|
||||
|
||||
loss = 0
|
||||
|
||||
# loss for segmentation
|
||||
preds_parsing = preds[0]
|
||||
for pred_parsing in preds_parsing:
|
||||
scale_pred = F.interpolate(input=pred_parsing, size=(h, w),
|
||||
mode='bilinear', align_corners=True)
|
||||
|
||||
loss += 0.5 * self.lamda_1 * self.lovasz(scale_pred, target[0])
|
||||
if target[2] is None:
|
||||
loss += 0.5 * self.lamda_1 * self.criterion(scale_pred, target[0])
|
||||
else:
|
||||
soft_scale_pred = F.interpolate(input=target[2], size=(h, w),
|
||||
mode='bilinear', align_corners=True)
|
||||
soft_scale_pred = moving_average(soft_scale_pred, to_one_hot(target[0], num_cls=self.num_classes),
|
||||
1.0 / (cycle_n + 1.0))
|
||||
loss += 0.5 * self.lamda_1 * self.kldiv(scale_pred, soft_scale_pred, target[0])
|
||||
|
||||
# loss for edge
|
||||
preds_edge = preds[1]
|
||||
for pred_edge in preds_edge:
|
||||
scale_pred = F.interpolate(input=pred_edge, size=(h, w),
|
||||
mode='bilinear', align_corners=True)
|
||||
if target[3] is None:
|
||||
loss += self.lamda_2 * F.cross_entropy(scale_pred, target[1],
|
||||
weights.cuda(), ignore_index=self.ignore_index)
|
||||
else:
|
||||
soft_scale_edge = F.interpolate(input=target[3], size=(h, w),
|
||||
mode='bilinear', align_corners=True)
|
||||
soft_scale_edge = moving_average(soft_scale_edge, to_one_hot(target[1], num_cls=2),
|
||||
1.0 / (cycle_n + 1.0))
|
||||
loss += self.lamda_2 * self.kldiv(scale_pred, soft_scale_edge, target[0])
|
||||
|
||||
# consistency regularization
|
||||
preds_parsing = preds[0]
|
||||
preds_edge = preds[1]
|
||||
for pred_parsing in preds_parsing:
|
||||
scale_pred = F.interpolate(input=pred_parsing, size=(h, w),
|
||||
mode='bilinear', align_corners=True)
|
||||
scale_edge = F.interpolate(input=preds_edge[0], size=(h, w),
|
||||
mode='bilinear', align_corners=True)
|
||||
loss += self.lamda_3 * self.reg(scale_pred, scale_edge, target[0])
|
||||
|
||||
return loss
|
||||
|
||||
def forward(self, preds, target, cycle_n=None):
|
||||
loss = self.parsing_loss(preds, target, cycle_n)
|
||||
return loss
|
||||
|
||||
def _generate_weights(self, masks, num_classes):
|
||||
"""
|
||||
masks: torch.Tensor with shape [B, H, W]
|
||||
"""
|
||||
masks_label = masks.data.cpu().numpy().astype(np.int64)
|
||||
pixel_nums = []
|
||||
tot_pixels = 0
|
||||
for i in range(num_classes):
|
||||
pixel_num_of_cls_i = np.sum(masks_label == i).astype(np.float)
|
||||
pixel_nums.append(pixel_num_of_cls_i)
|
||||
tot_pixels += pixel_num_of_cls_i
|
||||
weights = []
|
||||
for i in range(num_classes):
|
||||
weights.append(
|
||||
(tot_pixels - pixel_nums[i]) / tot_pixels / (num_classes - 1)
|
||||
)
|
||||
weights = np.array(weights, dtype=np.float)
|
||||
# weights = torch.from_numpy(weights).float().to(masks.device)
|
||||
return weights
|
||||
|
||||
|
||||
def moving_average(target1, target2, alpha=1.0):
|
||||
target = 0
|
||||
target += (1.0 - alpha) * target1
|
||||
target += target2 * alpha
|
||||
return target
|
||||
|
||||
|
||||
def to_one_hot(tensor, num_cls, dim=1, ignore_index=255):
|
||||
b, h, w = tensor.shape
|
||||
tensor[tensor == ignore_index] = 0
|
||||
onehot_tensor = torch.zeros(b, num_cls, h, w).cuda()
|
||||
onehot_tensor.scatter_(dim, tensor.unsqueeze(dim), 1)
|
||||
return onehot_tensor
|
188
vton-api/preprocess/humanparsing/utils/encoding.py
Normal file
188
vton-api/preprocess/humanparsing/utils/encoding.py
Normal file
@@ -0,0 +1,188 @@
|
||||
##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
## Created by: Hang Zhang
|
||||
## ECE Department, Rutgers University
|
||||
## Email: zhang.hang@rutgers.edu
|
||||
## Copyright (c) 2017
|
||||
##
|
||||
## This source code is licensed under the MIT-style license found in the
|
||||
## LICENSE file in the root directory of this source tree
|
||||
##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
|
||||
"""Encoding Data Parallel"""
|
||||
import threading
|
||||
import functools
|
||||
import torch
|
||||
from torch.autograd import Variable, Function
|
||||
import torch.cuda.comm as comm
|
||||
from torch.nn.parallel.data_parallel import DataParallel
|
||||
from torch.nn.parallel.parallel_apply import get_a_var
|
||||
from torch.nn.parallel._functions import ReduceAddCoalesced, Broadcast
|
||||
|
||||
torch_ver = torch.__version__[:3]
|
||||
|
||||
__all__ = ['allreduce', 'DataParallelModel', 'DataParallelCriterion', 'patch_replication_callback']
|
||||
|
||||
def allreduce(*inputs):
|
||||
"""Cross GPU all reduce autograd operation for calculate mean and
|
||||
variance in SyncBN.
|
||||
"""
|
||||
return AllReduce.apply(*inputs)
|
||||
|
||||
class AllReduce(Function):
|
||||
@staticmethod
|
||||
def forward(ctx, num_inputs, *inputs):
|
||||
ctx.num_inputs = num_inputs
|
||||
ctx.target_gpus = [inputs[i].get_device() for i in range(0, len(inputs), num_inputs)]
|
||||
inputs = [inputs[i:i + num_inputs]
|
||||
for i in range(0, len(inputs), num_inputs)]
|
||||
# sort before reduce sum
|
||||
inputs = sorted(inputs, key=lambda i: i[0].get_device())
|
||||
results = comm.reduce_add_coalesced(inputs, ctx.target_gpus[0])
|
||||
outputs = comm.broadcast_coalesced(results, ctx.target_gpus)
|
||||
return tuple([t for tensors in outputs for t in tensors])
|
||||
|
||||
@staticmethod
|
||||
def backward(ctx, *inputs):
|
||||
inputs = [i.data for i in inputs]
|
||||
inputs = [inputs[i:i + ctx.num_inputs]
|
||||
for i in range(0, len(inputs), ctx.num_inputs)]
|
||||
results = comm.reduce_add_coalesced(inputs, ctx.target_gpus[0])
|
||||
outputs = comm.broadcast_coalesced(results, ctx.target_gpus)
|
||||
return (None,) + tuple([Variable(t) for tensors in outputs for t in tensors])
|
||||
|
||||
class Reduce(Function):
|
||||
@staticmethod
|
||||
def forward(ctx, *inputs):
|
||||
ctx.target_gpus = [inputs[i].get_device() for i in range(len(inputs))]
|
||||
inputs = sorted(inputs, key=lambda i: i.get_device())
|
||||
return comm.reduce_add(inputs)
|
||||
|
||||
@staticmethod
|
||||
def backward(ctx, gradOutput):
|
||||
return Broadcast.apply(ctx.target_gpus, gradOutput)
|
||||
|
||||
|
||||
class DataParallelModel(DataParallel):
|
||||
"""Implements data parallelism at the module level.
|
||||
|
||||
This container parallelizes the application of the given module by
|
||||
splitting the input across the specified devices by chunking in the
|
||||
batch dimension.
|
||||
In the forward pass, the module is replicated on each device,
|
||||
and each replica handles a portion of the input. During the backwards pass, gradients from each replica are summed into the original module.
|
||||
Note that the outputs are not gathered, please use compatible
|
||||
:class:`encoding.parallel.DataParallelCriterion`.
|
||||
|
||||
The batch size should be larger than the number of GPUs used. It should
|
||||
also be an integer multiple of the number of GPUs so that each chunk is
|
||||
the same size (so that each GPU processes the same number of samples).
|
||||
|
||||
Args:
|
||||
module: module to be parallelized
|
||||
device_ids: CUDA devices (default: all devices)
|
||||
|
||||
Reference:
|
||||
Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang, Ambrish Tyagi,
|
||||
Amit Agrawal. “Context Encoding for Semantic Segmentation.
|
||||
*The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2018*
|
||||
|
||||
Example::
|
||||
|
||||
>>> net = encoding.nn.DataParallelModel(model, device_ids=[0, 1, 2])
|
||||
>>> y = net(x)
|
||||
"""
|
||||
def gather(self, outputs, output_device):
|
||||
return outputs
|
||||
|
||||
def replicate(self, module, device_ids):
|
||||
modules = super(DataParallelModel, self).replicate(module, device_ids)
|
||||
return modules
|
||||
|
||||
|
||||
class DataParallelCriterion(DataParallel):
|
||||
"""
|
||||
Calculate loss in multiple-GPUs, which balance the memory usage for
|
||||
Semantic Segmentation.
|
||||
|
||||
The targets are splitted across the specified devices by chunking in
|
||||
the batch dimension. Please use together with :class:`encoding.parallel.DataParallelModel`.
|
||||
|
||||
Reference:
|
||||
Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang, Ambrish Tyagi,
|
||||
Amit Agrawal. “Context Encoding for Semantic Segmentation.
|
||||
*The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2018*
|
||||
|
||||
Example::
|
||||
|
||||
>>> net = encoding.nn.DataParallelModel(model, device_ids=[0, 1, 2])
|
||||
>>> criterion = encoding.nn.DataParallelCriterion(criterion, device_ids=[0, 1, 2])
|
||||
>>> y = net(x)
|
||||
>>> loss = criterion(y, target)
|
||||
"""
|
||||
def forward(self, inputs, *targets, **kwargs):
|
||||
# input should be already scatterd
|
||||
# scattering the targets instead
|
||||
if not self.device_ids:
|
||||
return self.module(inputs, *targets, **kwargs)
|
||||
targets, kwargs = self.scatter(targets, kwargs, self.device_ids)
|
||||
if len(self.device_ids) == 1:
|
||||
return self.module(inputs, *targets[0], **kwargs[0])
|
||||
replicas = self.replicate(self.module, self.device_ids[:len(inputs)])
|
||||
outputs = _criterion_parallel_apply(replicas, inputs, targets, kwargs)
|
||||
return Reduce.apply(*outputs) / len(outputs)
|
||||
|
||||
|
||||
def _criterion_parallel_apply(modules, inputs, targets, kwargs_tup=None, devices=None):
|
||||
assert len(modules) == len(inputs)
|
||||
assert len(targets) == len(inputs)
|
||||
if kwargs_tup:
|
||||
assert len(modules) == len(kwargs_tup)
|
||||
else:
|
||||
kwargs_tup = ({},) * len(modules)
|
||||
if devices is not None:
|
||||
assert len(modules) == len(devices)
|
||||
else:
|
||||
devices = [None] * len(modules)
|
||||
|
||||
lock = threading.Lock()
|
||||
results = {}
|
||||
if torch_ver != "0.3":
|
||||
grad_enabled = torch.is_grad_enabled()
|
||||
|
||||
def _worker(i, module, input, target, kwargs, device=None):
|
||||
if torch_ver != "0.3":
|
||||
torch.set_grad_enabled(grad_enabled)
|
||||
if device is None:
|
||||
device = get_a_var(input).get_device()
|
||||
try:
|
||||
if not isinstance(input, tuple):
|
||||
input = (input,)
|
||||
with torch.cuda.device(device):
|
||||
output = module(*(input + target), **kwargs)
|
||||
with lock:
|
||||
results[i] = output
|
||||
except Exception as e:
|
||||
with lock:
|
||||
results[i] = e
|
||||
|
||||
if len(modules) > 1:
|
||||
threads = [threading.Thread(target=_worker,
|
||||
args=(i, module, input, target,
|
||||
kwargs, device),)
|
||||
for i, (module, input, target, kwargs, device) in
|
||||
enumerate(zip(modules, inputs, targets, kwargs_tup, devices))]
|
||||
|
||||
for thread in threads:
|
||||
thread.start()
|
||||
for thread in threads:
|
||||
thread.join()
|
||||
else:
|
||||
_worker(0, modules[0], inputs[0], kwargs_tup[0], devices[0])
|
||||
|
||||
outputs = []
|
||||
for i in range(len(inputs)):
|
||||
output = results[i]
|
||||
if isinstance(output, Exception):
|
||||
raise output
|
||||
outputs.append(output)
|
||||
return outputs
|
44
vton-api/preprocess/humanparsing/utils/kl_loss.py
Normal file
44
vton-api/preprocess/humanparsing/utils/kl_loss.py
Normal file
@@ -0,0 +1,44 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- encoding: utf-8 -*-
|
||||
|
||||
"""
|
||||
@Author : Peike Li
|
||||
@Contact : peike.li@yahoo.com
|
||||
@File : kl_loss.py
|
||||
@Time : 7/23/19 4:02 PM
|
||||
@Desc :
|
||||
@License : This source code is licensed under the license found in the
|
||||
LICENSE file in the root directory of this source tree.
|
||||
"""
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from torch import nn
|
||||
|
||||
|
||||
def flatten_probas(input, target, labels, ignore=255):
|
||||
"""
|
||||
Flattens predictions in the batch.
|
||||
"""
|
||||
B, C, H, W = input.size()
|
||||
input = input.permute(0, 2, 3, 1).contiguous().view(-1, C) # B * H * W, C = P, C
|
||||
target = target.permute(0, 2, 3, 1).contiguous().view(-1, C) # B * H * W, C = P, C
|
||||
labels = labels.view(-1)
|
||||
if ignore is None:
|
||||
return input, target
|
||||
valid = (labels != ignore)
|
||||
vinput = input[valid.nonzero().squeeze()]
|
||||
vtarget = target[valid.nonzero().squeeze()]
|
||||
return vinput, vtarget
|
||||
|
||||
|
||||
class KLDivergenceLoss(nn.Module):
|
||||
def __init__(self, ignore_index=255, T=1):
|
||||
super(KLDivergenceLoss, self).__init__()
|
||||
self.ignore_index=ignore_index
|
||||
self.T = T
|
||||
|
||||
def forward(self, input, target, label):
|
||||
log_input_prob = F.log_softmax(input / self.T, dim=1)
|
||||
target_porb = F.softmax(target / self.T, dim=1)
|
||||
loss = F.kl_div(*flatten_probas(log_input_prob, target_porb, label, ignore=self.ignore_index))
|
||||
return self.T*self.T*loss # balanced
|
279
vton-api/preprocess/humanparsing/utils/lovasz_softmax.py
Normal file
279
vton-api/preprocess/humanparsing/utils/lovasz_softmax.py
Normal file
@@ -0,0 +1,279 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- encoding: utf-8 -*-
|
||||
|
||||
"""
|
||||
@Author : Peike Li
|
||||
@Contact : peike.li@yahoo.com
|
||||
@File : lovasz_softmax.py
|
||||
@Time : 8/30/19 7:12 PM
|
||||
@Desc : Lovasz-Softmax and Jaccard hinge loss in PyTorch
|
||||
Maxim Berman 2018 ESAT-PSI KU Leuven (MIT License)
|
||||
@License : This source code is licensed under the license found in the
|
||||
LICENSE file in the root directory of this source tree.
|
||||
"""
|
||||
|
||||
from __future__ import print_function, division
|
||||
|
||||
import torch
|
||||
from torch.autograd import Variable
|
||||
import torch.nn.functional as F
|
||||
import numpy as np
|
||||
from torch import nn
|
||||
|
||||
try:
|
||||
from itertools import ifilterfalse
|
||||
except ImportError: # py3k
|
||||
from itertools import filterfalse as ifilterfalse
|
||||
|
||||
|
||||
def lovasz_grad(gt_sorted):
|
||||
"""
|
||||
Computes gradient of the Lovasz extension w.r.t sorted errors
|
||||
See Alg. 1 in paper
|
||||
"""
|
||||
p = len(gt_sorted)
|
||||
gts = gt_sorted.sum()
|
||||
intersection = gts - gt_sorted.float().cumsum(0)
|
||||
union = gts + (1 - gt_sorted).float().cumsum(0)
|
||||
jaccard = 1. - intersection / union
|
||||
if p > 1: # cover 1-pixel case
|
||||
jaccard[1:p] = jaccard[1:p] - jaccard[0:-1]
|
||||
return jaccard
|
||||
|
||||
|
||||
def iou_binary(preds, labels, EMPTY=1., ignore=None, per_image=True):
|
||||
"""
|
||||
IoU for foreground class
|
||||
binary: 1 foreground, 0 background
|
||||
"""
|
||||
if not per_image:
|
||||
preds, labels = (preds,), (labels,)
|
||||
ious = []
|
||||
for pred, label in zip(preds, labels):
|
||||
intersection = ((label == 1) & (pred == 1)).sum()
|
||||
union = ((label == 1) | ((pred == 1) & (label != ignore))).sum()
|
||||
if not union:
|
||||
iou = EMPTY
|
||||
else:
|
||||
iou = float(intersection) / float(union)
|
||||
ious.append(iou)
|
||||
iou = mean(ious) # mean accross images if per_image
|
||||
return 100 * iou
|
||||
|
||||
|
||||
def iou(preds, labels, C, EMPTY=1., ignore=None, per_image=False):
|
||||
"""
|
||||
Array of IoU for each (non ignored) class
|
||||
"""
|
||||
if not per_image:
|
||||
preds, labels = (preds,), (labels,)
|
||||
ious = []
|
||||
for pred, label in zip(preds, labels):
|
||||
iou = []
|
||||
for i in range(C):
|
||||
if i != ignore: # The ignored label is sometimes among predicted classes (ENet - CityScapes)
|
||||
intersection = ((label == i) & (pred == i)).sum()
|
||||
union = ((label == i) | ((pred == i) & (label != ignore))).sum()
|
||||
if not union:
|
||||
iou.append(EMPTY)
|
||||
else:
|
||||
iou.append(float(intersection) / float(union))
|
||||
ious.append(iou)
|
||||
ious = [mean(iou) for iou in zip(*ious)] # mean accross images if per_image
|
||||
return 100 * np.array(ious)
|
||||
|
||||
|
||||
# --------------------------- BINARY LOSSES ---------------------------
|
||||
|
||||
|
||||
def lovasz_hinge(logits, labels, per_image=True, ignore=None):
|
||||
"""
|
||||
Binary Lovasz hinge loss
|
||||
logits: [B, H, W] Variable, logits at each pixel (between -\infty and +\infty)
|
||||
labels: [B, H, W] Tensor, binary ground truth masks (0 or 1)
|
||||
per_image: compute the loss per image instead of per batch
|
||||
ignore: void class id
|
||||
"""
|
||||
if per_image:
|
||||
loss = mean(lovasz_hinge_flat(*flatten_binary_scores(log.unsqueeze(0), lab.unsqueeze(0), ignore))
|
||||
for log, lab in zip(logits, labels))
|
||||
else:
|
||||
loss = lovasz_hinge_flat(*flatten_binary_scores(logits, labels, ignore))
|
||||
return loss
|
||||
|
||||
|
||||
def lovasz_hinge_flat(logits, labels):
|
||||
"""
|
||||
Binary Lovasz hinge loss
|
||||
logits: [P] Variable, logits at each prediction (between -\infty and +\infty)
|
||||
labels: [P] Tensor, binary ground truth labels (0 or 1)
|
||||
ignore: label to ignore
|
||||
"""
|
||||
if len(labels) == 0:
|
||||
# only void pixels, the gradients should be 0
|
||||
return logits.sum() * 0.
|
||||
signs = 2. * labels.float() - 1.
|
||||
errors = (1. - logits * Variable(signs))
|
||||
errors_sorted, perm = torch.sort(errors, dim=0, descending=True)
|
||||
perm = perm.data
|
||||
gt_sorted = labels[perm]
|
||||
grad = lovasz_grad(gt_sorted)
|
||||
loss = torch.dot(F.relu(errors_sorted), Variable(grad))
|
||||
return loss
|
||||
|
||||
|
||||
def flatten_binary_scores(scores, labels, ignore=None):
|
||||
"""
|
||||
Flattens predictions in the batch (binary case)
|
||||
Remove labels equal to 'ignore'
|
||||
"""
|
||||
scores = scores.view(-1)
|
||||
labels = labels.view(-1)
|
||||
if ignore is None:
|
||||
return scores, labels
|
||||
valid = (labels != ignore)
|
||||
vscores = scores[valid]
|
||||
vlabels = labels[valid]
|
||||
return vscores, vlabels
|
||||
|
||||
|
||||
class StableBCELoss(torch.nn.modules.Module):
|
||||
def __init__(self):
|
||||
super(StableBCELoss, self).__init__()
|
||||
|
||||
def forward(self, input, target):
|
||||
neg_abs = - input.abs()
|
||||
loss = input.clamp(min=0) - input * target + (1 + neg_abs.exp()).log()
|
||||
return loss.mean()
|
||||
|
||||
|
||||
def binary_xloss(logits, labels, ignore=None):
|
||||
"""
|
||||
Binary Cross entropy loss
|
||||
logits: [B, H, W] Variable, logits at each pixel (between -\infty and +\infty)
|
||||
labels: [B, H, W] Tensor, binary ground truth masks (0 or 1)
|
||||
ignore: void class id
|
||||
"""
|
||||
logits, labels = flatten_binary_scores(logits, labels, ignore)
|
||||
loss = StableBCELoss()(logits, Variable(labels.float()))
|
||||
return loss
|
||||
|
||||
|
||||
# --------------------------- MULTICLASS LOSSES ---------------------------
|
||||
|
||||
|
||||
def lovasz_softmax(probas, labels, classes='present', per_image=False, ignore=255, weighted=None):
|
||||
"""
|
||||
Multi-class Lovasz-Softmax loss
|
||||
probas: [B, C, H, W] Variable, class probabilities at each prediction (between 0 and 1).
|
||||
Interpreted as binary (sigmoid) output with outputs of size [B, H, W].
|
||||
labels: [B, H, W] Tensor, ground truth labels (between 0 and C - 1)
|
||||
classes: 'all' for all, 'present' for classes present in labels, or a list of classes to average.
|
||||
per_image: compute the loss per image instead of per batch
|
||||
ignore: void class labels
|
||||
"""
|
||||
if per_image:
|
||||
loss = mean(lovasz_softmax_flat(*flatten_probas(prob.unsqueeze(0), lab.unsqueeze(0), ignore), classes=classes, weighted=weighted)
|
||||
for prob, lab in zip(probas, labels))
|
||||
else:
|
||||
loss = lovasz_softmax_flat(*flatten_probas(probas, labels, ignore), classes=classes, weighted=weighted )
|
||||
return loss
|
||||
|
||||
|
||||
def lovasz_softmax_flat(probas, labels, classes='present', weighted=None):
|
||||
"""
|
||||
Multi-class Lovasz-Softmax loss
|
||||
probas: [P, C] Variable, class probabilities at each prediction (between 0 and 1)
|
||||
labels: [P] Tensor, ground truth labels (between 0 and C - 1)
|
||||
classes: 'all' for all, 'present' for classes present in labels, or a list of classes to average.
|
||||
"""
|
||||
if probas.numel() == 0:
|
||||
# only void pixels, the gradients should be 0
|
||||
return probas * 0.
|
||||
C = probas.size(1)
|
||||
losses = []
|
||||
class_to_sum = list(range(C)) if classes in ['all', 'present'] else classes
|
||||
for c in class_to_sum:
|
||||
fg = (labels == c).float() # foreground for class c
|
||||
if (classes is 'present' and fg.sum() == 0):
|
||||
continue
|
||||
if C == 1:
|
||||
if len(classes) > 1:
|
||||
raise ValueError('Sigmoid output possible only with 1 class')
|
||||
class_pred = probas[:, 0]
|
||||
else:
|
||||
class_pred = probas[:, c]
|
||||
errors = (Variable(fg) - class_pred).abs()
|
||||
errors_sorted, perm = torch.sort(errors, 0, descending=True)
|
||||
perm = perm.data
|
||||
fg_sorted = fg[perm]
|
||||
if weighted is not None:
|
||||
losses.append(weighted[c]*torch.dot(errors_sorted, Variable(lovasz_grad(fg_sorted))))
|
||||
else:
|
||||
losses.append(torch.dot(errors_sorted, Variable(lovasz_grad(fg_sorted))))
|
||||
return mean(losses)
|
||||
|
||||
|
||||
def flatten_probas(probas, labels, ignore=None):
|
||||
"""
|
||||
Flattens predictions in the batch
|
||||
"""
|
||||
if probas.dim() == 3:
|
||||
# assumes output of a sigmoid layer
|
||||
B, H, W = probas.size()
|
||||
probas = probas.view(B, 1, H, W)
|
||||
B, C, H, W = probas.size()
|
||||
probas = probas.permute(0, 2, 3, 1).contiguous().view(-1, C) # B * H * W, C = P, C
|
||||
labels = labels.view(-1)
|
||||
if ignore is None:
|
||||
return probas, labels
|
||||
valid = (labels != ignore)
|
||||
vprobas = probas[valid.nonzero().squeeze()]
|
||||
vlabels = labels[valid]
|
||||
return vprobas, vlabels
|
||||
|
||||
|
||||
def xloss(logits, labels, ignore=None):
|
||||
"""
|
||||
Cross entropy loss
|
||||
"""
|
||||
return F.cross_entropy(logits, Variable(labels), ignore_index=255)
|
||||
|
||||
|
||||
# --------------------------- HELPER FUNCTIONS ---------------------------
|
||||
def isnan(x):
|
||||
return x != x
|
||||
|
||||
|
||||
def mean(l, ignore_nan=False, empty=0):
|
||||
"""
|
||||
nanmean compatible with generators.
|
||||
"""
|
||||
l = iter(l)
|
||||
if ignore_nan:
|
||||
l = ifilterfalse(isnan, l)
|
||||
try:
|
||||
n = 1
|
||||
acc = next(l)
|
||||
except StopIteration:
|
||||
if empty == 'raise':
|
||||
raise ValueError('Empty mean')
|
||||
return empty
|
||||
for n, v in enumerate(l, 2):
|
||||
acc += v
|
||||
if n == 1:
|
||||
return acc
|
||||
return acc / n
|
||||
|
||||
# --------------------------- Class ---------------------------
|
||||
class LovaszSoftmax(nn.Module):
|
||||
def __init__(self, per_image=False, ignore_index=255, weighted=None):
|
||||
super(LovaszSoftmax, self).__init__()
|
||||
self.lovasz_softmax = lovasz_softmax
|
||||
self.per_image = per_image
|
||||
self.ignore_index=ignore_index
|
||||
self.weighted = weighted
|
||||
|
||||
def forward(self, pred, label):
|
||||
pred = F.softmax(pred, dim=1)
|
||||
return self.lovasz_softmax(pred, label, per_image=self.per_image, ignore=self.ignore_index, weighted=self.weighted)
|
155
vton-api/preprocess/humanparsing/utils/miou.py
Normal file
155
vton-api/preprocess/humanparsing/utils/miou.py
Normal file
@@ -0,0 +1,155 @@
|
||||
import cv2
|
||||
import os
|
||||
import numpy as np
|
||||
|
||||
from collections import OrderedDict
|
||||
from PIL import Image as PILImage
|
||||
from utils.transforms import transform_parsing
|
||||
|
||||
LABELS = ['Background', 'Hat', 'Hair', 'Glove', 'Sunglasses', 'Upper-clothes', 'Dress', 'Coat', \
|
||||
'Socks', 'Pants', 'Jumpsuits', 'Scarf', 'Skirt', 'Face', 'Left-arm', 'Right-arm', 'Left-leg',
|
||||
'Right-leg', 'Left-shoe', 'Right-shoe']
|
||||
|
||||
|
||||
# LABELS = ['Background', 'Head', 'Torso', 'Upper Arms', 'Lower Arms', 'Upper Legs', 'Lower Legs']
|
||||
|
||||
def get_palette(num_cls):
|
||||
""" Returns the color map for visualizing the segmentation mask.
|
||||
Args:
|
||||
num_cls: Number of classes
|
||||
Returns:
|
||||
The color map
|
||||
"""
|
||||
|
||||
n = num_cls
|
||||
palette = [0] * (n * 3)
|
||||
for j in range(0, n):
|
||||
lab = j
|
||||
palette[j * 3 + 0] = 0
|
||||
palette[j * 3 + 1] = 0
|
||||
palette[j * 3 + 2] = 0
|
||||
i = 0
|
||||
while lab:
|
||||
palette[j * 3 + 0] |= (((lab >> 0) & 1) << (7 - i))
|
||||
palette[j * 3 + 1] |= (((lab >> 1) & 1) << (7 - i))
|
||||
palette[j * 3 + 2] |= (((lab >> 2) & 1) << (7 - i))
|
||||
i += 1
|
||||
lab >>= 3
|
||||
return palette
|
||||
|
||||
|
||||
def get_confusion_matrix(gt_label, pred_label, num_classes):
|
||||
"""
|
||||
Calcute the confusion matrix by given label and pred
|
||||
:param gt_label: the ground truth label
|
||||
:param pred_label: the pred label
|
||||
:param num_classes: the nunber of class
|
||||
:return: the confusion matrix
|
||||
"""
|
||||
index = (gt_label * num_classes + pred_label).astype('int32')
|
||||
label_count = np.bincount(index)
|
||||
confusion_matrix = np.zeros((num_classes, num_classes))
|
||||
|
||||
for i_label in range(num_classes):
|
||||
for i_pred_label in range(num_classes):
|
||||
cur_index = i_label * num_classes + i_pred_label
|
||||
if cur_index < len(label_count):
|
||||
confusion_matrix[i_label, i_pred_label] = label_count[cur_index]
|
||||
|
||||
return confusion_matrix
|
||||
|
||||
|
||||
def compute_mean_ioU(preds, scales, centers, num_classes, datadir, input_size=[473, 473], dataset='val'):
|
||||
val_file = os.path.join(datadir, dataset + '_id.txt')
|
||||
val_id = [i_id.strip() for i_id in open(val_file)]
|
||||
|
||||
confusion_matrix = np.zeros((num_classes, num_classes))
|
||||
|
||||
for i, pred_out in enumerate(preds):
|
||||
im_name = val_id[i]
|
||||
gt_path = os.path.join(datadir, dataset + '_segmentations', im_name + '.png')
|
||||
gt = np.array(PILImage.open(gt_path))
|
||||
h, w = gt.shape
|
||||
s = scales[i]
|
||||
c = centers[i]
|
||||
pred = transform_parsing(pred_out, c, s, w, h, input_size)
|
||||
|
||||
gt = np.asarray(gt, dtype=np.int32)
|
||||
pred = np.asarray(pred, dtype=np.int32)
|
||||
|
||||
ignore_index = gt != 255
|
||||
|
||||
gt = gt[ignore_index]
|
||||
pred = pred[ignore_index]
|
||||
|
||||
confusion_matrix += get_confusion_matrix(gt, pred, num_classes)
|
||||
|
||||
pos = confusion_matrix.sum(1)
|
||||
res = confusion_matrix.sum(0)
|
||||
tp = np.diag(confusion_matrix)
|
||||
|
||||
pixel_accuracy = (tp.sum() / pos.sum()) * 100
|
||||
mean_accuracy = ((tp / np.maximum(1.0, pos)).mean()) * 100
|
||||
IoU_array = (tp / np.maximum(1.0, pos + res - tp))
|
||||
IoU_array = IoU_array * 100
|
||||
mean_IoU = IoU_array.mean()
|
||||
print('Pixel accuracy: %f \n' % pixel_accuracy)
|
||||
print('Mean accuracy: %f \n' % mean_accuracy)
|
||||
print('Mean IU: %f \n' % mean_IoU)
|
||||
name_value = []
|
||||
|
||||
for i, (label, iou) in enumerate(zip(LABELS, IoU_array)):
|
||||
name_value.append((label, iou))
|
||||
|
||||
name_value.append(('Pixel accuracy', pixel_accuracy))
|
||||
name_value.append(('Mean accuracy', mean_accuracy))
|
||||
name_value.append(('Mean IU', mean_IoU))
|
||||
name_value = OrderedDict(name_value)
|
||||
return name_value
|
||||
|
||||
|
||||
def compute_mean_ioU_file(preds_dir, num_classes, datadir, dataset='val'):
|
||||
list_path = os.path.join(datadir, dataset + '_id.txt')
|
||||
val_id = [i_id.strip() for i_id in open(list_path)]
|
||||
|
||||
confusion_matrix = np.zeros((num_classes, num_classes))
|
||||
|
||||
for i, im_name in enumerate(val_id):
|
||||
gt_path = os.path.join(datadir, 'segmentations', im_name + '.png')
|
||||
gt = cv2.imread(gt_path, cv2.IMREAD_GRAYSCALE)
|
||||
|
||||
pred_path = os.path.join(preds_dir, im_name + '.png')
|
||||
pred = np.asarray(PILImage.open(pred_path))
|
||||
|
||||
gt = np.asarray(gt, dtype=np.int32)
|
||||
pred = np.asarray(pred, dtype=np.int32)
|
||||
|
||||
ignore_index = gt != 255
|
||||
|
||||
gt = gt[ignore_index]
|
||||
pred = pred[ignore_index]
|
||||
|
||||
confusion_matrix += get_confusion_matrix(gt, pred, num_classes)
|
||||
|
||||
pos = confusion_matrix.sum(1)
|
||||
res = confusion_matrix.sum(0)
|
||||
tp = np.diag(confusion_matrix)
|
||||
|
||||
pixel_accuracy = (tp.sum() / pos.sum()) * 100
|
||||
mean_accuracy = ((tp / np.maximum(1.0, pos)).mean()) * 100
|
||||
IoU_array = (tp / np.maximum(1.0, pos + res - tp))
|
||||
IoU_array = IoU_array * 100
|
||||
mean_IoU = IoU_array.mean()
|
||||
print('Pixel accuracy: %f \n' % pixel_accuracy)
|
||||
print('Mean accuracy: %f \n' % mean_accuracy)
|
||||
print('Mean IU: %f \n' % mean_IoU)
|
||||
name_value = []
|
||||
|
||||
for i, (label, iou) in enumerate(zip(LABELS, IoU_array)):
|
||||
name_value.append((label, iou))
|
||||
|
||||
name_value.append(('Pixel accuracy', pixel_accuracy))
|
||||
name_value.append(('Mean accuracy', mean_accuracy))
|
||||
name_value.append(('Mean IU', mean_IoU))
|
||||
name_value = OrderedDict(name_value)
|
||||
return name_value
|
80
vton-api/preprocess/humanparsing/utils/schp.py
Normal file
80
vton-api/preprocess/humanparsing/utils/schp.py
Normal file
@@ -0,0 +1,80 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- encoding: utf-8 -*-
|
||||
|
||||
"""
|
||||
@Author : Peike Li
|
||||
@Contact : peike.li@yahoo.com
|
||||
@File : schp.py
|
||||
@Time : 4/8/19 2:11 PM
|
||||
@Desc :
|
||||
@License : This source code is licensed under the license found in the
|
||||
LICENSE file in the root directory of this source tree.
|
||||
"""
|
||||
|
||||
import os
|
||||
import torch
|
||||
import modules
|
||||
|
||||
def moving_average(net1, net2, alpha=1):
|
||||
for param1, param2 in zip(net1.parameters(), net2.parameters()):
|
||||
param1.data *= (1.0 - alpha)
|
||||
param1.data += param2.data * alpha
|
||||
|
||||
|
||||
def _check_bn(module, flag):
|
||||
if issubclass(module.__class__, modules.bn.InPlaceABNSync):
|
||||
flag[0] = True
|
||||
|
||||
|
||||
def check_bn(model):
|
||||
flag = [False]
|
||||
model.apply(lambda module: _check_bn(module, flag))
|
||||
return flag[0]
|
||||
|
||||
|
||||
def reset_bn(module):
|
||||
if issubclass(module.__class__, modules.bn.InPlaceABNSync):
|
||||
module.running_mean = torch.zeros_like(module.running_mean)
|
||||
module.running_var = torch.ones_like(module.running_var)
|
||||
|
||||
|
||||
def _get_momenta(module, momenta):
|
||||
if issubclass(module.__class__, modules.bn.InPlaceABNSync):
|
||||
momenta[module] = module.momentum
|
||||
|
||||
|
||||
def _set_momenta(module, momenta):
|
||||
if issubclass(module.__class__, modules.bn.InPlaceABNSync):
|
||||
module.momentum = momenta[module]
|
||||
|
||||
|
||||
def bn_re_estimate(loader, model):
|
||||
if not check_bn(model):
|
||||
print('No batch norm layer detected')
|
||||
return
|
||||
model.train()
|
||||
momenta = {}
|
||||
model.apply(reset_bn)
|
||||
model.apply(lambda module: _get_momenta(module, momenta))
|
||||
n = 0
|
||||
for i_iter, batch in enumerate(loader):
|
||||
images, labels, _ = batch
|
||||
b = images.data.size(0)
|
||||
momentum = b / (n + b)
|
||||
for module in momenta.keys():
|
||||
module.momentum = momentum
|
||||
model(images)
|
||||
n += b
|
||||
model.apply(lambda module: _set_momenta(module, momenta))
|
||||
|
||||
|
||||
def save_schp_checkpoint(states, is_best_parsing, output_dir, filename='schp_checkpoint.pth.tar'):
|
||||
save_path = os.path.join(output_dir, filename)
|
||||
if os.path.exists(save_path):
|
||||
os.remove(save_path)
|
||||
torch.save(states, save_path)
|
||||
if is_best_parsing and 'state_dict' in states:
|
||||
best_save_path = os.path.join(output_dir, 'model_parsing_best.pth.tar')
|
||||
if os.path.exists(best_save_path):
|
||||
os.remove(best_save_path)
|
||||
torch.save(states, best_save_path)
|
111
vton-api/preprocess/humanparsing/utils/soft_dice_loss.py
Normal file
111
vton-api/preprocess/humanparsing/utils/soft_dice_loss.py
Normal file
@@ -0,0 +1,111 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- encoding: utf-8 -*-
|
||||
|
||||
"""
|
||||
@Author : Peike Li
|
||||
@Contact : peike.li@yahoo.com
|
||||
@File : soft_dice_loss.py
|
||||
@Time : 8/13/19 5:09 PM
|
||||
@Desc :
|
||||
@License : This source code is licensed under the license found in the
|
||||
LICENSE file in the root directory of this source tree.
|
||||
"""
|
||||
|
||||
from __future__ import print_function, division
|
||||
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from torch import nn
|
||||
|
||||
try:
|
||||
from itertools import ifilterfalse
|
||||
except ImportError: # py3k
|
||||
from itertools import filterfalse as ifilterfalse
|
||||
|
||||
|
||||
def tversky_loss(probas, labels, alpha=0.5, beta=0.5, epsilon=1e-6):
|
||||
'''
|
||||
Tversky loss function.
|
||||
probas: [P, C] Variable, class probabilities at each prediction (between 0 and 1)
|
||||
labels: [P] Tensor, ground truth labels (between 0 and C - 1)
|
||||
|
||||
Same as soft dice loss when alpha=beta=0.5.
|
||||
Same as Jaccord loss when alpha=beta=1.0.
|
||||
See `Tversky loss function for image segmentation using 3D fully convolutional deep networks`
|
||||
https://arxiv.org/pdf/1706.05721.pdf
|
||||
'''
|
||||
C = probas.size(1)
|
||||
losses = []
|
||||
for c in list(range(C)):
|
||||
fg = (labels == c).float()
|
||||
if fg.sum() == 0:
|
||||
continue
|
||||
class_pred = probas[:, c]
|
||||
p0 = class_pred
|
||||
p1 = 1 - class_pred
|
||||
g0 = fg
|
||||
g1 = 1 - fg
|
||||
numerator = torch.sum(p0 * g0)
|
||||
denominator = numerator + alpha * torch.sum(p0 * g1) + beta * torch.sum(p1 * g0)
|
||||
losses.append(1 - ((numerator) / (denominator + epsilon)))
|
||||
return mean(losses)
|
||||
|
||||
|
||||
def flatten_probas(probas, labels, ignore=255):
|
||||
"""
|
||||
Flattens predictions in the batch
|
||||
"""
|
||||
B, C, H, W = probas.size()
|
||||
probas = probas.permute(0, 2, 3, 1).contiguous().view(-1, C) # B * H * W, C = P, C
|
||||
labels = labels.view(-1)
|
||||
if ignore is None:
|
||||
return probas, labels
|
||||
valid = (labels != ignore)
|
||||
vprobas = probas[valid.nonzero().squeeze()]
|
||||
vlabels = labels[valid]
|
||||
return vprobas, vlabels
|
||||
|
||||
|
||||
def isnan(x):
|
||||
return x != x
|
||||
|
||||
|
||||
def mean(l, ignore_nan=False, empty=0):
|
||||
"""
|
||||
nanmean compatible with generators.
|
||||
"""
|
||||
l = iter(l)
|
||||
if ignore_nan:
|
||||
l = ifilterfalse(isnan, l)
|
||||
try:
|
||||
n = 1
|
||||
acc = next(l)
|
||||
except StopIteration:
|
||||
if empty == 'raise':
|
||||
raise ValueError('Empty mean')
|
||||
return empty
|
||||
for n, v in enumerate(l, 2):
|
||||
acc += v
|
||||
if n == 1:
|
||||
return acc
|
||||
return acc / n
|
||||
|
||||
|
||||
class SoftDiceLoss(nn.Module):
|
||||
def __init__(self, ignore_index=255):
|
||||
super(SoftDiceLoss, self).__init__()
|
||||
self.ignore_index = ignore_index
|
||||
|
||||
def forward(self, pred, label):
|
||||
pred = F.softmax(pred, dim=1)
|
||||
return tversky_loss(*flatten_probas(pred, label, ignore=self.ignore_index), alpha=0.5, beta=0.5)
|
||||
|
||||
|
||||
class SoftJaccordLoss(nn.Module):
|
||||
def __init__(self, ignore_index=255):
|
||||
super(SoftJaccordLoss, self).__init__()
|
||||
self.ignore_index = ignore_index
|
||||
|
||||
def forward(self, pred, label):
|
||||
pred = F.softmax(pred, dim=1)
|
||||
return tversky_loss(*flatten_probas(pred, label, ignore=self.ignore_index), alpha=1.0, beta=1.0)
|
167
vton-api/preprocess/humanparsing/utils/transforms.py
Normal file
167
vton-api/preprocess/humanparsing/utils/transforms.py
Normal file
@@ -0,0 +1,167 @@
|
||||
# ------------------------------------------------------------------------------
|
||||
# Copyright (c) Microsoft
|
||||
# Licensed under the MIT License.
|
||||
# Written by Bin Xiao (Bin.Xiao@microsoft.com)
|
||||
# ------------------------------------------------------------------------------
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
import cv2
|
||||
import torch
|
||||
|
||||
class BRG2Tensor_transform(object):
|
||||
def __call__(self, pic):
|
||||
img = torch.from_numpy(pic.transpose((2, 0, 1)))
|
||||
if isinstance(img, torch.ByteTensor):
|
||||
return img.float()
|
||||
else:
|
||||
return img
|
||||
|
||||
class BGR2RGB_transform(object):
|
||||
def __call__(self, tensor):
|
||||
return tensor[[2,1,0],:,:]
|
||||
|
||||
def flip_back(output_flipped, matched_parts):
|
||||
'''
|
||||
ouput_flipped: numpy.ndarray(batch_size, num_joints, height, width)
|
||||
'''
|
||||
assert output_flipped.ndim == 4,\
|
||||
'output_flipped should be [batch_size, num_joints, height, width]'
|
||||
|
||||
output_flipped = output_flipped[:, :, :, ::-1]
|
||||
|
||||
for pair in matched_parts:
|
||||
tmp = output_flipped[:, pair[0], :, :].copy()
|
||||
output_flipped[:, pair[0], :, :] = output_flipped[:, pair[1], :, :]
|
||||
output_flipped[:, pair[1], :, :] = tmp
|
||||
|
||||
return output_flipped
|
||||
|
||||
|
||||
def fliplr_joints(joints, joints_vis, width, matched_parts):
|
||||
"""
|
||||
flip coords
|
||||
"""
|
||||
# Flip horizontal
|
||||
joints[:, 0] = width - joints[:, 0] - 1
|
||||
|
||||
# Change left-right parts
|
||||
for pair in matched_parts:
|
||||
joints[pair[0], :], joints[pair[1], :] = \
|
||||
joints[pair[1], :], joints[pair[0], :].copy()
|
||||
joints_vis[pair[0], :], joints_vis[pair[1], :] = \
|
||||
joints_vis[pair[1], :], joints_vis[pair[0], :].copy()
|
||||
|
||||
return joints*joints_vis, joints_vis
|
||||
|
||||
|
||||
def transform_preds(coords, center, scale, input_size):
|
||||
target_coords = np.zeros(coords.shape)
|
||||
trans = get_affine_transform(center, scale, 0, input_size, inv=1)
|
||||
for p in range(coords.shape[0]):
|
||||
target_coords[p, 0:2] = affine_transform(coords[p, 0:2], trans)
|
||||
return target_coords
|
||||
|
||||
def transform_parsing(pred, center, scale, width, height, input_size):
|
||||
|
||||
trans = get_affine_transform(center, scale, 0, input_size, inv=1)
|
||||
target_pred = cv2.warpAffine(
|
||||
pred,
|
||||
trans,
|
||||
(int(width), int(height)), #(int(width), int(height)),
|
||||
flags=cv2.INTER_NEAREST,
|
||||
borderMode=cv2.BORDER_CONSTANT,
|
||||
borderValue=(0))
|
||||
|
||||
return target_pred
|
||||
|
||||
def transform_logits(logits, center, scale, width, height, input_size):
|
||||
|
||||
trans = get_affine_transform(center, scale, 0, input_size, inv=1)
|
||||
channel = logits.shape[2]
|
||||
target_logits = []
|
||||
for i in range(channel):
|
||||
target_logit = cv2.warpAffine(
|
||||
logits[:,:,i],
|
||||
trans,
|
||||
(int(width), int(height)), #(int(width), int(height)),
|
||||
flags=cv2.INTER_LINEAR,
|
||||
borderMode=cv2.BORDER_CONSTANT,
|
||||
borderValue=(0))
|
||||
target_logits.append(target_logit)
|
||||
target_logits = np.stack(target_logits,axis=2)
|
||||
|
||||
return target_logits
|
||||
|
||||
|
||||
def get_affine_transform(center,
|
||||
scale,
|
||||
rot,
|
||||
output_size,
|
||||
shift=np.array([0, 0], dtype=np.float32),
|
||||
inv=0):
|
||||
if not isinstance(scale, np.ndarray) and not isinstance(scale, list):
|
||||
print(scale)
|
||||
scale = np.array([scale, scale])
|
||||
|
||||
scale_tmp = scale
|
||||
|
||||
src_w = scale_tmp[0]
|
||||
dst_w = output_size[1]
|
||||
dst_h = output_size[0]
|
||||
|
||||
rot_rad = np.pi * rot / 180
|
||||
src_dir = get_dir([0, src_w * -0.5], rot_rad)
|
||||
dst_dir = np.array([0, (dst_w-1) * -0.5], np.float32)
|
||||
|
||||
src = np.zeros((3, 2), dtype=np.float32)
|
||||
dst = np.zeros((3, 2), dtype=np.float32)
|
||||
src[0, :] = center + scale_tmp * shift
|
||||
src[1, :] = center + src_dir + scale_tmp * shift
|
||||
dst[0, :] = [(dst_w-1) * 0.5, (dst_h-1) * 0.5]
|
||||
dst[1, :] = np.array([(dst_w-1) * 0.5, (dst_h-1) * 0.5]) + dst_dir
|
||||
|
||||
src[2:, :] = get_3rd_point(src[0, :], src[1, :])
|
||||
dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :])
|
||||
|
||||
if inv:
|
||||
trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
|
||||
else:
|
||||
trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
|
||||
|
||||
return trans
|
||||
|
||||
|
||||
def affine_transform(pt, t):
|
||||
new_pt = np.array([pt[0], pt[1], 1.]).T
|
||||
new_pt = np.dot(t, new_pt)
|
||||
return new_pt[:2]
|
||||
|
||||
|
||||
def get_3rd_point(a, b):
|
||||
direct = a - b
|
||||
return b + np.array([-direct[1], direct[0]], dtype=np.float32)
|
||||
|
||||
|
||||
def get_dir(src_point, rot_rad):
|
||||
sn, cs = np.sin(rot_rad), np.cos(rot_rad)
|
||||
|
||||
src_result = [0, 0]
|
||||
src_result[0] = src_point[0] * cs - src_point[1] * sn
|
||||
src_result[1] = src_point[0] * sn + src_point[1] * cs
|
||||
|
||||
return src_result
|
||||
|
||||
|
||||
def crop(img, center, scale, output_size, rot=0):
|
||||
trans = get_affine_transform(center, scale, rot, output_size)
|
||||
|
||||
dst_img = cv2.warpAffine(img,
|
||||
trans,
|
||||
(int(output_size[1]), int(output_size[0])),
|
||||
flags=cv2.INTER_LINEAR)
|
||||
|
||||
return dst_img
|
71
vton-api/preprocess/humanparsing/utils/warmup_scheduler.py
Normal file
71
vton-api/preprocess/humanparsing/utils/warmup_scheduler.py
Normal file
@@ -0,0 +1,71 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- encoding: utf-8 -*-
|
||||
|
||||
"""
|
||||
@Author : Peike Li
|
||||
@Contact : peike.li@yahoo.com
|
||||
@File : warmup_scheduler.py
|
||||
@Time : 3/28/19 2:24 PM
|
||||
@Desc :
|
||||
@License : This source code is licensed under the license found in the
|
||||
LICENSE file in the root directory of this source tree.
|
||||
"""
|
||||
|
||||
import math
|
||||
from torch.optim.lr_scheduler import _LRScheduler
|
||||
|
||||
|
||||
class GradualWarmupScheduler(_LRScheduler):
|
||||
""" Gradually warm-up learning rate with cosine annealing in optimizer.
|
||||
Proposed in 'Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour'.
|
||||
"""
|
||||
|
||||
def __init__(self, optimizer, total_epoch, eta_min=0, warmup_epoch=10, last_epoch=-1):
|
||||
self.total_epoch = total_epoch
|
||||
self.eta_min = eta_min
|
||||
self.warmup_epoch = warmup_epoch
|
||||
super(GradualWarmupScheduler, self).__init__(optimizer, last_epoch)
|
||||
|
||||
def get_lr(self):
|
||||
if self.last_epoch <= self.warmup_epoch:
|
||||
return [self.eta_min + self.last_epoch*(base_lr - self.eta_min)/self.warmup_epoch for base_lr in self.base_lrs]
|
||||
else:
|
||||
return [self.eta_min + (base_lr-self.eta_min)*(1+math.cos(math.pi*(self.last_epoch-self.warmup_epoch)/(self.total_epoch-self.warmup_epoch))) / 2 for base_lr in self.base_lrs]
|
||||
|
||||
|
||||
class SGDRScheduler(_LRScheduler):
|
||||
""" Consine annealing with warm up and restarts.
|
||||
Proposed in `SGDR: Stochastic Gradient Descent with Warm Restarts`.
|
||||
"""
|
||||
def __init__(self, optimizer, total_epoch=150, start_cyclical=100, cyclical_base_lr=7e-4, cyclical_epoch=10, eta_min=0, warmup_epoch=10, last_epoch=-1):
|
||||
self.total_epoch = total_epoch
|
||||
self.start_cyclical = start_cyclical
|
||||
self.cyclical_epoch = cyclical_epoch
|
||||
self.cyclical_base_lr = cyclical_base_lr
|
||||
self.eta_min = eta_min
|
||||
self.warmup_epoch = warmup_epoch
|
||||
super(SGDRScheduler, self).__init__(optimizer, last_epoch)
|
||||
|
||||
def get_lr(self):
|
||||
if self.last_epoch < self.warmup_epoch:
|
||||
return [self.eta_min + self.last_epoch*(base_lr - self.eta_min)/self.warmup_epoch for base_lr in self.base_lrs]
|
||||
elif self.last_epoch < self.start_cyclical:
|
||||
return [self.eta_min + (base_lr-self.eta_min)*(1+math.cos(math.pi*(self.last_epoch-self.warmup_epoch)/(self.start_cyclical-self.warmup_epoch))) / 2 for base_lr in self.base_lrs]
|
||||
else:
|
||||
return [self.eta_min + (self.cyclical_base_lr-self.eta_min)*(1+math.cos(math.pi* ((self.last_epoch-self.start_cyclical)% self.cyclical_epoch)/self.cyclical_epoch)) / 2 for base_lr in self.base_lrs]
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import matplotlib.pyplot as plt
|
||||
import torch
|
||||
model = torch.nn.Linear(10, 2)
|
||||
optimizer = torch.optim.SGD(params=model.parameters(), lr=7e-3, momentum=0.9, weight_decay=5e-4)
|
||||
scheduler_warmup = SGDRScheduler(optimizer, total_epoch=150, eta_min=7e-5, warmup_epoch=10, start_cyclical=100, cyclical_base_lr=3.5e-3, cyclical_epoch=10)
|
||||
lr = []
|
||||
for epoch in range(0,150):
|
||||
scheduler_warmup.step(epoch)
|
||||
lr.append(scheduler_warmup.get_lr())
|
||||
plt.style.use('ggplot')
|
||||
plt.plot(list(range(0,150)), lr)
|
||||
plt.show()
|
||||
|
Reference in New Issue
Block a user