Add at new repo again

This commit is contained in:
2025-01-28 21:48:35 +00:00
commit 6e660ddb3c
564 changed files with 75575 additions and 0 deletions

View File

@@ -0,0 +1,33 @@
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
"""
@Author : Peike Li
@Contact : peike.li@yahoo.com
@File : kl_loss.py
@Time : 7/23/19 4:02 PM
@Desc :
@License : This source code is licensed under the license found in the
LICENSE file in the root directory of this source tree.
"""
import torch
import torch.nn.functional as F
from torch import nn
from datasets.target_generation import generate_edge_tensor
class ConsistencyLoss(nn.Module):
def __init__(self, ignore_index=255):
super(ConsistencyLoss, self).__init__()
self.ignore_index=ignore_index
def forward(self, parsing, edge, label):
parsing_pre = torch.argmax(parsing, dim=1)
parsing_pre[label==self.ignore_index]=self.ignore_index
generated_edge = generate_edge_tensor(parsing_pre)
edge_pre = torch.argmax(edge, dim=1)
v_generate_edge = generated_edge[label!=255]
v_edge_pre = edge_pre[label!=255]
v_edge_pre = v_edge_pre.type(torch.cuda.FloatTensor)
positive_union = (v_generate_edge==1)&(v_edge_pre==1) # only the positive values count
return F.smooth_l1_loss(v_generate_edge[positive_union].squeeze(0), v_edge_pre[positive_union].squeeze(0))

View File

@@ -0,0 +1,142 @@
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
"""
@Author : Peike Li
@Contact : peike.li@yahoo.com
@File : criterion.py
@Time : 8/30/19 8:59 PM
@Desc :
@License : This source code is licensed under the license found in the
LICENSE file in the root directory of this source tree.
"""
import torch.nn as nn
import torch
import numpy as np
from torch.nn import functional as F
from .lovasz_softmax import LovaszSoftmax
from .kl_loss import KLDivergenceLoss
from .consistency_loss import ConsistencyLoss
NUM_CLASSES = 20
class CriterionAll(nn.Module):
def __init__(self, use_class_weight=False, ignore_index=255, lambda_1=1, lambda_2=1, lambda_3=1,
num_classes=20):
super(CriterionAll, self).__init__()
self.ignore_index = ignore_index
self.use_class_weight = use_class_weight
self.criterion = torch.nn.CrossEntropyLoss(ignore_index=ignore_index)
self.lovasz = LovaszSoftmax(ignore_index=ignore_index)
self.kldiv = KLDivergenceLoss(ignore_index=ignore_index)
self.reg = ConsistencyLoss(ignore_index=ignore_index)
self.lamda_1 = lambda_1
self.lamda_2 = lambda_2
self.lamda_3 = lambda_3
self.num_classes = num_classes
def parsing_loss(self, preds, target, cycle_n=None):
"""
Loss function definition.
Args:
preds: [[parsing result1, parsing result2],[edge result]]
target: [parsing label, egde label]
soft_preds: [[parsing result1, parsing result2],[edge result]]
Returns:
Calculated Loss.
"""
h, w = target[0].size(1), target[0].size(2)
pos_num = torch.sum(target[1] == 1, dtype=torch.float)
neg_num = torch.sum(target[1] == 0, dtype=torch.float)
weight_pos = neg_num / (pos_num + neg_num)
weight_neg = pos_num / (pos_num + neg_num)
weights = torch.tensor([weight_neg, weight_pos]) # edge loss weight
loss = 0
# loss for segmentation
preds_parsing = preds[0]
for pred_parsing in preds_parsing:
scale_pred = F.interpolate(input=pred_parsing, size=(h, w),
mode='bilinear', align_corners=True)
loss += 0.5 * self.lamda_1 * self.lovasz(scale_pred, target[0])
if target[2] is None:
loss += 0.5 * self.lamda_1 * self.criterion(scale_pred, target[0])
else:
soft_scale_pred = F.interpolate(input=target[2], size=(h, w),
mode='bilinear', align_corners=True)
soft_scale_pred = moving_average(soft_scale_pred, to_one_hot(target[0], num_cls=self.num_classes),
1.0 / (cycle_n + 1.0))
loss += 0.5 * self.lamda_1 * self.kldiv(scale_pred, soft_scale_pred, target[0])
# loss for edge
preds_edge = preds[1]
for pred_edge in preds_edge:
scale_pred = F.interpolate(input=pred_edge, size=(h, w),
mode='bilinear', align_corners=True)
if target[3] is None:
loss += self.lamda_2 * F.cross_entropy(scale_pred, target[1],
weights.cuda(), ignore_index=self.ignore_index)
else:
soft_scale_edge = F.interpolate(input=target[3], size=(h, w),
mode='bilinear', align_corners=True)
soft_scale_edge = moving_average(soft_scale_edge, to_one_hot(target[1], num_cls=2),
1.0 / (cycle_n + 1.0))
loss += self.lamda_2 * self.kldiv(scale_pred, soft_scale_edge, target[0])
# consistency regularization
preds_parsing = preds[0]
preds_edge = preds[1]
for pred_parsing in preds_parsing:
scale_pred = F.interpolate(input=pred_parsing, size=(h, w),
mode='bilinear', align_corners=True)
scale_edge = F.interpolate(input=preds_edge[0], size=(h, w),
mode='bilinear', align_corners=True)
loss += self.lamda_3 * self.reg(scale_pred, scale_edge, target[0])
return loss
def forward(self, preds, target, cycle_n=None):
loss = self.parsing_loss(preds, target, cycle_n)
return loss
def _generate_weights(self, masks, num_classes):
"""
masks: torch.Tensor with shape [B, H, W]
"""
masks_label = masks.data.cpu().numpy().astype(np.int64)
pixel_nums = []
tot_pixels = 0
for i in range(num_classes):
pixel_num_of_cls_i = np.sum(masks_label == i).astype(np.float)
pixel_nums.append(pixel_num_of_cls_i)
tot_pixels += pixel_num_of_cls_i
weights = []
for i in range(num_classes):
weights.append(
(tot_pixels - pixel_nums[i]) / tot_pixels / (num_classes - 1)
)
weights = np.array(weights, dtype=np.float)
# weights = torch.from_numpy(weights).float().to(masks.device)
return weights
def moving_average(target1, target2, alpha=1.0):
target = 0
target += (1.0 - alpha) * target1
target += target2 * alpha
return target
def to_one_hot(tensor, num_cls, dim=1, ignore_index=255):
b, h, w = tensor.shape
tensor[tensor == ignore_index] = 0
onehot_tensor = torch.zeros(b, num_cls, h, w).cuda()
onehot_tensor.scatter_(dim, tensor.unsqueeze(dim), 1)
return onehot_tensor

View File

@@ -0,0 +1,188 @@
##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
## Created by: Hang Zhang
## ECE Department, Rutgers University
## Email: zhang.hang@rutgers.edu
## Copyright (c) 2017
##
## This source code is licensed under the MIT-style license found in the
## LICENSE file in the root directory of this source tree
##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
"""Encoding Data Parallel"""
import threading
import functools
import torch
from torch.autograd import Variable, Function
import torch.cuda.comm as comm
from torch.nn.parallel.data_parallel import DataParallel
from torch.nn.parallel.parallel_apply import get_a_var
from torch.nn.parallel._functions import ReduceAddCoalesced, Broadcast
torch_ver = torch.__version__[:3]
__all__ = ['allreduce', 'DataParallelModel', 'DataParallelCriterion', 'patch_replication_callback']
def allreduce(*inputs):
"""Cross GPU all reduce autograd operation for calculate mean and
variance in SyncBN.
"""
return AllReduce.apply(*inputs)
class AllReduce(Function):
@staticmethod
def forward(ctx, num_inputs, *inputs):
ctx.num_inputs = num_inputs
ctx.target_gpus = [inputs[i].get_device() for i in range(0, len(inputs), num_inputs)]
inputs = [inputs[i:i + num_inputs]
for i in range(0, len(inputs), num_inputs)]
# sort before reduce sum
inputs = sorted(inputs, key=lambda i: i[0].get_device())
results = comm.reduce_add_coalesced(inputs, ctx.target_gpus[0])
outputs = comm.broadcast_coalesced(results, ctx.target_gpus)
return tuple([t for tensors in outputs for t in tensors])
@staticmethod
def backward(ctx, *inputs):
inputs = [i.data for i in inputs]
inputs = [inputs[i:i + ctx.num_inputs]
for i in range(0, len(inputs), ctx.num_inputs)]
results = comm.reduce_add_coalesced(inputs, ctx.target_gpus[0])
outputs = comm.broadcast_coalesced(results, ctx.target_gpus)
return (None,) + tuple([Variable(t) for tensors in outputs for t in tensors])
class Reduce(Function):
@staticmethod
def forward(ctx, *inputs):
ctx.target_gpus = [inputs[i].get_device() for i in range(len(inputs))]
inputs = sorted(inputs, key=lambda i: i.get_device())
return comm.reduce_add(inputs)
@staticmethod
def backward(ctx, gradOutput):
return Broadcast.apply(ctx.target_gpus, gradOutput)
class DataParallelModel(DataParallel):
"""Implements data parallelism at the module level.
This container parallelizes the application of the given module by
splitting the input across the specified devices by chunking in the
batch dimension.
In the forward pass, the module is replicated on each device,
and each replica handles a portion of the input. During the backwards pass, gradients from each replica are summed into the original module.
Note that the outputs are not gathered, please use compatible
:class:`encoding.parallel.DataParallelCriterion`.
The batch size should be larger than the number of GPUs used. It should
also be an integer multiple of the number of GPUs so that each chunk is
the same size (so that each GPU processes the same number of samples).
Args:
module: module to be parallelized
device_ids: CUDA devices (default: all devices)
Reference:
Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang, Ambrish Tyagi,
Amit Agrawal. “Context Encoding for Semantic Segmentation.
*The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2018*
Example::
>>> net = encoding.nn.DataParallelModel(model, device_ids=[0, 1, 2])
>>> y = net(x)
"""
def gather(self, outputs, output_device):
return outputs
def replicate(self, module, device_ids):
modules = super(DataParallelModel, self).replicate(module, device_ids)
return modules
class DataParallelCriterion(DataParallel):
"""
Calculate loss in multiple-GPUs, which balance the memory usage for
Semantic Segmentation.
The targets are splitted across the specified devices by chunking in
the batch dimension. Please use together with :class:`encoding.parallel.DataParallelModel`.
Reference:
Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang, Ambrish Tyagi,
Amit Agrawal. “Context Encoding for Semantic Segmentation.
*The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2018*
Example::
>>> net = encoding.nn.DataParallelModel(model, device_ids=[0, 1, 2])
>>> criterion = encoding.nn.DataParallelCriterion(criterion, device_ids=[0, 1, 2])
>>> y = net(x)
>>> loss = criterion(y, target)
"""
def forward(self, inputs, *targets, **kwargs):
# input should be already scatterd
# scattering the targets instead
if not self.device_ids:
return self.module(inputs, *targets, **kwargs)
targets, kwargs = self.scatter(targets, kwargs, self.device_ids)
if len(self.device_ids) == 1:
return self.module(inputs, *targets[0], **kwargs[0])
replicas = self.replicate(self.module, self.device_ids[:len(inputs)])
outputs = _criterion_parallel_apply(replicas, inputs, targets, kwargs)
return Reduce.apply(*outputs) / len(outputs)
def _criterion_parallel_apply(modules, inputs, targets, kwargs_tup=None, devices=None):
assert len(modules) == len(inputs)
assert len(targets) == len(inputs)
if kwargs_tup:
assert len(modules) == len(kwargs_tup)
else:
kwargs_tup = ({},) * len(modules)
if devices is not None:
assert len(modules) == len(devices)
else:
devices = [None] * len(modules)
lock = threading.Lock()
results = {}
if torch_ver != "0.3":
grad_enabled = torch.is_grad_enabled()
def _worker(i, module, input, target, kwargs, device=None):
if torch_ver != "0.3":
torch.set_grad_enabled(grad_enabled)
if device is None:
device = get_a_var(input).get_device()
try:
if not isinstance(input, tuple):
input = (input,)
with torch.cuda.device(device):
output = module(*(input + target), **kwargs)
with lock:
results[i] = output
except Exception as e:
with lock:
results[i] = e
if len(modules) > 1:
threads = [threading.Thread(target=_worker,
args=(i, module, input, target,
kwargs, device),)
for i, (module, input, target, kwargs, device) in
enumerate(zip(modules, inputs, targets, kwargs_tup, devices))]
for thread in threads:
thread.start()
for thread in threads:
thread.join()
else:
_worker(0, modules[0], inputs[0], kwargs_tup[0], devices[0])
outputs = []
for i in range(len(inputs)):
output = results[i]
if isinstance(output, Exception):
raise output
outputs.append(output)
return outputs

View File

@@ -0,0 +1,44 @@
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
"""
@Author : Peike Li
@Contact : peike.li@yahoo.com
@File : kl_loss.py
@Time : 7/23/19 4:02 PM
@Desc :
@License : This source code is licensed under the license found in the
LICENSE file in the root directory of this source tree.
"""
import torch
import torch.nn.functional as F
from torch import nn
def flatten_probas(input, target, labels, ignore=255):
"""
Flattens predictions in the batch.
"""
B, C, H, W = input.size()
input = input.permute(0, 2, 3, 1).contiguous().view(-1, C) # B * H * W, C = P, C
target = target.permute(0, 2, 3, 1).contiguous().view(-1, C) # B * H * W, C = P, C
labels = labels.view(-1)
if ignore is None:
return input, target
valid = (labels != ignore)
vinput = input[valid.nonzero().squeeze()]
vtarget = target[valid.nonzero().squeeze()]
return vinput, vtarget
class KLDivergenceLoss(nn.Module):
def __init__(self, ignore_index=255, T=1):
super(KLDivergenceLoss, self).__init__()
self.ignore_index=ignore_index
self.T = T
def forward(self, input, target, label):
log_input_prob = F.log_softmax(input / self.T, dim=1)
target_porb = F.softmax(target / self.T, dim=1)
loss = F.kl_div(*flatten_probas(log_input_prob, target_porb, label, ignore=self.ignore_index))
return self.T*self.T*loss # balanced

View File

@@ -0,0 +1,279 @@
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
"""
@Author : Peike Li
@Contact : peike.li@yahoo.com
@File : lovasz_softmax.py
@Time : 8/30/19 7:12 PM
@Desc : Lovasz-Softmax and Jaccard hinge loss in PyTorch
Maxim Berman 2018 ESAT-PSI KU Leuven (MIT License)
@License : This source code is licensed under the license found in the
LICENSE file in the root directory of this source tree.
"""
from __future__ import print_function, division
import torch
from torch.autograd import Variable
import torch.nn.functional as F
import numpy as np
from torch import nn
try:
from itertools import ifilterfalse
except ImportError: # py3k
from itertools import filterfalse as ifilterfalse
def lovasz_grad(gt_sorted):
"""
Computes gradient of the Lovasz extension w.r.t sorted errors
See Alg. 1 in paper
"""
p = len(gt_sorted)
gts = gt_sorted.sum()
intersection = gts - gt_sorted.float().cumsum(0)
union = gts + (1 - gt_sorted).float().cumsum(0)
jaccard = 1. - intersection / union
if p > 1: # cover 1-pixel case
jaccard[1:p] = jaccard[1:p] - jaccard[0:-1]
return jaccard
def iou_binary(preds, labels, EMPTY=1., ignore=None, per_image=True):
"""
IoU for foreground class
binary: 1 foreground, 0 background
"""
if not per_image:
preds, labels = (preds,), (labels,)
ious = []
for pred, label in zip(preds, labels):
intersection = ((label == 1) & (pred == 1)).sum()
union = ((label == 1) | ((pred == 1) & (label != ignore))).sum()
if not union:
iou = EMPTY
else:
iou = float(intersection) / float(union)
ious.append(iou)
iou = mean(ious) # mean accross images if per_image
return 100 * iou
def iou(preds, labels, C, EMPTY=1., ignore=None, per_image=False):
"""
Array of IoU for each (non ignored) class
"""
if not per_image:
preds, labels = (preds,), (labels,)
ious = []
for pred, label in zip(preds, labels):
iou = []
for i in range(C):
if i != ignore: # The ignored label is sometimes among predicted classes (ENet - CityScapes)
intersection = ((label == i) & (pred == i)).sum()
union = ((label == i) | ((pred == i) & (label != ignore))).sum()
if not union:
iou.append(EMPTY)
else:
iou.append(float(intersection) / float(union))
ious.append(iou)
ious = [mean(iou) for iou in zip(*ious)] # mean accross images if per_image
return 100 * np.array(ious)
# --------------------------- BINARY LOSSES ---------------------------
def lovasz_hinge(logits, labels, per_image=True, ignore=None):
"""
Binary Lovasz hinge loss
logits: [B, H, W] Variable, logits at each pixel (between -\infty and +\infty)
labels: [B, H, W] Tensor, binary ground truth masks (0 or 1)
per_image: compute the loss per image instead of per batch
ignore: void class id
"""
if per_image:
loss = mean(lovasz_hinge_flat(*flatten_binary_scores(log.unsqueeze(0), lab.unsqueeze(0), ignore))
for log, lab in zip(logits, labels))
else:
loss = lovasz_hinge_flat(*flatten_binary_scores(logits, labels, ignore))
return loss
def lovasz_hinge_flat(logits, labels):
"""
Binary Lovasz hinge loss
logits: [P] Variable, logits at each prediction (between -\infty and +\infty)
labels: [P] Tensor, binary ground truth labels (0 or 1)
ignore: label to ignore
"""
if len(labels) == 0:
# only void pixels, the gradients should be 0
return logits.sum() * 0.
signs = 2. * labels.float() - 1.
errors = (1. - logits * Variable(signs))
errors_sorted, perm = torch.sort(errors, dim=0, descending=True)
perm = perm.data
gt_sorted = labels[perm]
grad = lovasz_grad(gt_sorted)
loss = torch.dot(F.relu(errors_sorted), Variable(grad))
return loss
def flatten_binary_scores(scores, labels, ignore=None):
"""
Flattens predictions in the batch (binary case)
Remove labels equal to 'ignore'
"""
scores = scores.view(-1)
labels = labels.view(-1)
if ignore is None:
return scores, labels
valid = (labels != ignore)
vscores = scores[valid]
vlabels = labels[valid]
return vscores, vlabels
class StableBCELoss(torch.nn.modules.Module):
def __init__(self):
super(StableBCELoss, self).__init__()
def forward(self, input, target):
neg_abs = - input.abs()
loss = input.clamp(min=0) - input * target + (1 + neg_abs.exp()).log()
return loss.mean()
def binary_xloss(logits, labels, ignore=None):
"""
Binary Cross entropy loss
logits: [B, H, W] Variable, logits at each pixel (between -\infty and +\infty)
labels: [B, H, W] Tensor, binary ground truth masks (0 or 1)
ignore: void class id
"""
logits, labels = flatten_binary_scores(logits, labels, ignore)
loss = StableBCELoss()(logits, Variable(labels.float()))
return loss
# --------------------------- MULTICLASS LOSSES ---------------------------
def lovasz_softmax(probas, labels, classes='present', per_image=False, ignore=255, weighted=None):
"""
Multi-class Lovasz-Softmax loss
probas: [B, C, H, W] Variable, class probabilities at each prediction (between 0 and 1).
Interpreted as binary (sigmoid) output with outputs of size [B, H, W].
labels: [B, H, W] Tensor, ground truth labels (between 0 and C - 1)
classes: 'all' for all, 'present' for classes present in labels, or a list of classes to average.
per_image: compute the loss per image instead of per batch
ignore: void class labels
"""
if per_image:
loss = mean(lovasz_softmax_flat(*flatten_probas(prob.unsqueeze(0), lab.unsqueeze(0), ignore), classes=classes, weighted=weighted)
for prob, lab in zip(probas, labels))
else:
loss = lovasz_softmax_flat(*flatten_probas(probas, labels, ignore), classes=classes, weighted=weighted )
return loss
def lovasz_softmax_flat(probas, labels, classes='present', weighted=None):
"""
Multi-class Lovasz-Softmax loss
probas: [P, C] Variable, class probabilities at each prediction (between 0 and 1)
labels: [P] Tensor, ground truth labels (between 0 and C - 1)
classes: 'all' for all, 'present' for classes present in labels, or a list of classes to average.
"""
if probas.numel() == 0:
# only void pixels, the gradients should be 0
return probas * 0.
C = probas.size(1)
losses = []
class_to_sum = list(range(C)) if classes in ['all', 'present'] else classes
for c in class_to_sum:
fg = (labels == c).float() # foreground for class c
if (classes is 'present' and fg.sum() == 0):
continue
if C == 1:
if len(classes) > 1:
raise ValueError('Sigmoid output possible only with 1 class')
class_pred = probas[:, 0]
else:
class_pred = probas[:, c]
errors = (Variable(fg) - class_pred).abs()
errors_sorted, perm = torch.sort(errors, 0, descending=True)
perm = perm.data
fg_sorted = fg[perm]
if weighted is not None:
losses.append(weighted[c]*torch.dot(errors_sorted, Variable(lovasz_grad(fg_sorted))))
else:
losses.append(torch.dot(errors_sorted, Variable(lovasz_grad(fg_sorted))))
return mean(losses)
def flatten_probas(probas, labels, ignore=None):
"""
Flattens predictions in the batch
"""
if probas.dim() == 3:
# assumes output of a sigmoid layer
B, H, W = probas.size()
probas = probas.view(B, 1, H, W)
B, C, H, W = probas.size()
probas = probas.permute(0, 2, 3, 1).contiguous().view(-1, C) # B * H * W, C = P, C
labels = labels.view(-1)
if ignore is None:
return probas, labels
valid = (labels != ignore)
vprobas = probas[valid.nonzero().squeeze()]
vlabels = labels[valid]
return vprobas, vlabels
def xloss(logits, labels, ignore=None):
"""
Cross entropy loss
"""
return F.cross_entropy(logits, Variable(labels), ignore_index=255)
# --------------------------- HELPER FUNCTIONS ---------------------------
def isnan(x):
return x != x
def mean(l, ignore_nan=False, empty=0):
"""
nanmean compatible with generators.
"""
l = iter(l)
if ignore_nan:
l = ifilterfalse(isnan, l)
try:
n = 1
acc = next(l)
except StopIteration:
if empty == 'raise':
raise ValueError('Empty mean')
return empty
for n, v in enumerate(l, 2):
acc += v
if n == 1:
return acc
return acc / n
# --------------------------- Class ---------------------------
class LovaszSoftmax(nn.Module):
def __init__(self, per_image=False, ignore_index=255, weighted=None):
super(LovaszSoftmax, self).__init__()
self.lovasz_softmax = lovasz_softmax
self.per_image = per_image
self.ignore_index=ignore_index
self.weighted = weighted
def forward(self, pred, label):
pred = F.softmax(pred, dim=1)
return self.lovasz_softmax(pred, label, per_image=self.per_image, ignore=self.ignore_index, weighted=self.weighted)

View File

@@ -0,0 +1,155 @@
import cv2
import os
import numpy as np
from collections import OrderedDict
from PIL import Image as PILImage
from utils.transforms import transform_parsing
LABELS = ['Background', 'Hat', 'Hair', 'Glove', 'Sunglasses', 'Upper-clothes', 'Dress', 'Coat', \
'Socks', 'Pants', 'Jumpsuits', 'Scarf', 'Skirt', 'Face', 'Left-arm', 'Right-arm', 'Left-leg',
'Right-leg', 'Left-shoe', 'Right-shoe']
# LABELS = ['Background', 'Head', 'Torso', 'Upper Arms', 'Lower Arms', 'Upper Legs', 'Lower Legs']
def get_palette(num_cls):
""" Returns the color map for visualizing the segmentation mask.
Args:
num_cls: Number of classes
Returns:
The color map
"""
n = num_cls
palette = [0] * (n * 3)
for j in range(0, n):
lab = j
palette[j * 3 + 0] = 0
palette[j * 3 + 1] = 0
palette[j * 3 + 2] = 0
i = 0
while lab:
palette[j * 3 + 0] |= (((lab >> 0) & 1) << (7 - i))
palette[j * 3 + 1] |= (((lab >> 1) & 1) << (7 - i))
palette[j * 3 + 2] |= (((lab >> 2) & 1) << (7 - i))
i += 1
lab >>= 3
return palette
def get_confusion_matrix(gt_label, pred_label, num_classes):
"""
Calcute the confusion matrix by given label and pred
:param gt_label: the ground truth label
:param pred_label: the pred label
:param num_classes: the nunber of class
:return: the confusion matrix
"""
index = (gt_label * num_classes + pred_label).astype('int32')
label_count = np.bincount(index)
confusion_matrix = np.zeros((num_classes, num_classes))
for i_label in range(num_classes):
for i_pred_label in range(num_classes):
cur_index = i_label * num_classes + i_pred_label
if cur_index < len(label_count):
confusion_matrix[i_label, i_pred_label] = label_count[cur_index]
return confusion_matrix
def compute_mean_ioU(preds, scales, centers, num_classes, datadir, input_size=[473, 473], dataset='val'):
val_file = os.path.join(datadir, dataset + '_id.txt')
val_id = [i_id.strip() for i_id in open(val_file)]
confusion_matrix = np.zeros((num_classes, num_classes))
for i, pred_out in enumerate(preds):
im_name = val_id[i]
gt_path = os.path.join(datadir, dataset + '_segmentations', im_name + '.png')
gt = np.array(PILImage.open(gt_path))
h, w = gt.shape
s = scales[i]
c = centers[i]
pred = transform_parsing(pred_out, c, s, w, h, input_size)
gt = np.asarray(gt, dtype=np.int32)
pred = np.asarray(pred, dtype=np.int32)
ignore_index = gt != 255
gt = gt[ignore_index]
pred = pred[ignore_index]
confusion_matrix += get_confusion_matrix(gt, pred, num_classes)
pos = confusion_matrix.sum(1)
res = confusion_matrix.sum(0)
tp = np.diag(confusion_matrix)
pixel_accuracy = (tp.sum() / pos.sum()) * 100
mean_accuracy = ((tp / np.maximum(1.0, pos)).mean()) * 100
IoU_array = (tp / np.maximum(1.0, pos + res - tp))
IoU_array = IoU_array * 100
mean_IoU = IoU_array.mean()
print('Pixel accuracy: %f \n' % pixel_accuracy)
print('Mean accuracy: %f \n' % mean_accuracy)
print('Mean IU: %f \n' % mean_IoU)
name_value = []
for i, (label, iou) in enumerate(zip(LABELS, IoU_array)):
name_value.append((label, iou))
name_value.append(('Pixel accuracy', pixel_accuracy))
name_value.append(('Mean accuracy', mean_accuracy))
name_value.append(('Mean IU', mean_IoU))
name_value = OrderedDict(name_value)
return name_value
def compute_mean_ioU_file(preds_dir, num_classes, datadir, dataset='val'):
list_path = os.path.join(datadir, dataset + '_id.txt')
val_id = [i_id.strip() for i_id in open(list_path)]
confusion_matrix = np.zeros((num_classes, num_classes))
for i, im_name in enumerate(val_id):
gt_path = os.path.join(datadir, 'segmentations', im_name + '.png')
gt = cv2.imread(gt_path, cv2.IMREAD_GRAYSCALE)
pred_path = os.path.join(preds_dir, im_name + '.png')
pred = np.asarray(PILImage.open(pred_path))
gt = np.asarray(gt, dtype=np.int32)
pred = np.asarray(pred, dtype=np.int32)
ignore_index = gt != 255
gt = gt[ignore_index]
pred = pred[ignore_index]
confusion_matrix += get_confusion_matrix(gt, pred, num_classes)
pos = confusion_matrix.sum(1)
res = confusion_matrix.sum(0)
tp = np.diag(confusion_matrix)
pixel_accuracy = (tp.sum() / pos.sum()) * 100
mean_accuracy = ((tp / np.maximum(1.0, pos)).mean()) * 100
IoU_array = (tp / np.maximum(1.0, pos + res - tp))
IoU_array = IoU_array * 100
mean_IoU = IoU_array.mean()
print('Pixel accuracy: %f \n' % pixel_accuracy)
print('Mean accuracy: %f \n' % mean_accuracy)
print('Mean IU: %f \n' % mean_IoU)
name_value = []
for i, (label, iou) in enumerate(zip(LABELS, IoU_array)):
name_value.append((label, iou))
name_value.append(('Pixel accuracy', pixel_accuracy))
name_value.append(('Mean accuracy', mean_accuracy))
name_value.append(('Mean IU', mean_IoU))
name_value = OrderedDict(name_value)
return name_value

View File

@@ -0,0 +1,80 @@
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
"""
@Author : Peike Li
@Contact : peike.li@yahoo.com
@File : schp.py
@Time : 4/8/19 2:11 PM
@Desc :
@License : This source code is licensed under the license found in the
LICENSE file in the root directory of this source tree.
"""
import os
import torch
import modules
def moving_average(net1, net2, alpha=1):
for param1, param2 in zip(net1.parameters(), net2.parameters()):
param1.data *= (1.0 - alpha)
param1.data += param2.data * alpha
def _check_bn(module, flag):
if issubclass(module.__class__, modules.bn.InPlaceABNSync):
flag[0] = True
def check_bn(model):
flag = [False]
model.apply(lambda module: _check_bn(module, flag))
return flag[0]
def reset_bn(module):
if issubclass(module.__class__, modules.bn.InPlaceABNSync):
module.running_mean = torch.zeros_like(module.running_mean)
module.running_var = torch.ones_like(module.running_var)
def _get_momenta(module, momenta):
if issubclass(module.__class__, modules.bn.InPlaceABNSync):
momenta[module] = module.momentum
def _set_momenta(module, momenta):
if issubclass(module.__class__, modules.bn.InPlaceABNSync):
module.momentum = momenta[module]
def bn_re_estimate(loader, model):
if not check_bn(model):
print('No batch norm layer detected')
return
model.train()
momenta = {}
model.apply(reset_bn)
model.apply(lambda module: _get_momenta(module, momenta))
n = 0
for i_iter, batch in enumerate(loader):
images, labels, _ = batch
b = images.data.size(0)
momentum = b / (n + b)
for module in momenta.keys():
module.momentum = momentum
model(images)
n += b
model.apply(lambda module: _set_momenta(module, momenta))
def save_schp_checkpoint(states, is_best_parsing, output_dir, filename='schp_checkpoint.pth.tar'):
save_path = os.path.join(output_dir, filename)
if os.path.exists(save_path):
os.remove(save_path)
torch.save(states, save_path)
if is_best_parsing and 'state_dict' in states:
best_save_path = os.path.join(output_dir, 'model_parsing_best.pth.tar')
if os.path.exists(best_save_path):
os.remove(best_save_path)
torch.save(states, best_save_path)

View File

@@ -0,0 +1,111 @@
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
"""
@Author : Peike Li
@Contact : peike.li@yahoo.com
@File : soft_dice_loss.py
@Time : 8/13/19 5:09 PM
@Desc :
@License : This source code is licensed under the license found in the
LICENSE file in the root directory of this source tree.
"""
from __future__ import print_function, division
import torch
import torch.nn.functional as F
from torch import nn
try:
from itertools import ifilterfalse
except ImportError: # py3k
from itertools import filterfalse as ifilterfalse
def tversky_loss(probas, labels, alpha=0.5, beta=0.5, epsilon=1e-6):
'''
Tversky loss function.
probas: [P, C] Variable, class probabilities at each prediction (between 0 and 1)
labels: [P] Tensor, ground truth labels (between 0 and C - 1)
Same as soft dice loss when alpha=beta=0.5.
Same as Jaccord loss when alpha=beta=1.0.
See `Tversky loss function for image segmentation using 3D fully convolutional deep networks`
https://arxiv.org/pdf/1706.05721.pdf
'''
C = probas.size(1)
losses = []
for c in list(range(C)):
fg = (labels == c).float()
if fg.sum() == 0:
continue
class_pred = probas[:, c]
p0 = class_pred
p1 = 1 - class_pred
g0 = fg
g1 = 1 - fg
numerator = torch.sum(p0 * g0)
denominator = numerator + alpha * torch.sum(p0 * g1) + beta * torch.sum(p1 * g0)
losses.append(1 - ((numerator) / (denominator + epsilon)))
return mean(losses)
def flatten_probas(probas, labels, ignore=255):
"""
Flattens predictions in the batch
"""
B, C, H, W = probas.size()
probas = probas.permute(0, 2, 3, 1).contiguous().view(-1, C) # B * H * W, C = P, C
labels = labels.view(-1)
if ignore is None:
return probas, labels
valid = (labels != ignore)
vprobas = probas[valid.nonzero().squeeze()]
vlabels = labels[valid]
return vprobas, vlabels
def isnan(x):
return x != x
def mean(l, ignore_nan=False, empty=0):
"""
nanmean compatible with generators.
"""
l = iter(l)
if ignore_nan:
l = ifilterfalse(isnan, l)
try:
n = 1
acc = next(l)
except StopIteration:
if empty == 'raise':
raise ValueError('Empty mean')
return empty
for n, v in enumerate(l, 2):
acc += v
if n == 1:
return acc
return acc / n
class SoftDiceLoss(nn.Module):
def __init__(self, ignore_index=255):
super(SoftDiceLoss, self).__init__()
self.ignore_index = ignore_index
def forward(self, pred, label):
pred = F.softmax(pred, dim=1)
return tversky_loss(*flatten_probas(pred, label, ignore=self.ignore_index), alpha=0.5, beta=0.5)
class SoftJaccordLoss(nn.Module):
def __init__(self, ignore_index=255):
super(SoftJaccordLoss, self).__init__()
self.ignore_index = ignore_index
def forward(self, pred, label):
pred = F.softmax(pred, dim=1)
return tversky_loss(*flatten_probas(pred, label, ignore=self.ignore_index), alpha=1.0, beta=1.0)

View File

@@ -0,0 +1,167 @@
# ------------------------------------------------------------------------------
# Copyright (c) Microsoft
# Licensed under the MIT License.
# Written by Bin Xiao (Bin.Xiao@microsoft.com)
# ------------------------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import cv2
import torch
class BRG2Tensor_transform(object):
def __call__(self, pic):
img = torch.from_numpy(pic.transpose((2, 0, 1)))
if isinstance(img, torch.ByteTensor):
return img.float()
else:
return img
class BGR2RGB_transform(object):
def __call__(self, tensor):
return tensor[[2,1,0],:,:]
def flip_back(output_flipped, matched_parts):
'''
ouput_flipped: numpy.ndarray(batch_size, num_joints, height, width)
'''
assert output_flipped.ndim == 4,\
'output_flipped should be [batch_size, num_joints, height, width]'
output_flipped = output_flipped[:, :, :, ::-1]
for pair in matched_parts:
tmp = output_flipped[:, pair[0], :, :].copy()
output_flipped[:, pair[0], :, :] = output_flipped[:, pair[1], :, :]
output_flipped[:, pair[1], :, :] = tmp
return output_flipped
def fliplr_joints(joints, joints_vis, width, matched_parts):
"""
flip coords
"""
# Flip horizontal
joints[:, 0] = width - joints[:, 0] - 1
# Change left-right parts
for pair in matched_parts:
joints[pair[0], :], joints[pair[1], :] = \
joints[pair[1], :], joints[pair[0], :].copy()
joints_vis[pair[0], :], joints_vis[pair[1], :] = \
joints_vis[pair[1], :], joints_vis[pair[0], :].copy()
return joints*joints_vis, joints_vis
def transform_preds(coords, center, scale, input_size):
target_coords = np.zeros(coords.shape)
trans = get_affine_transform(center, scale, 0, input_size, inv=1)
for p in range(coords.shape[0]):
target_coords[p, 0:2] = affine_transform(coords[p, 0:2], trans)
return target_coords
def transform_parsing(pred, center, scale, width, height, input_size):
trans = get_affine_transform(center, scale, 0, input_size, inv=1)
target_pred = cv2.warpAffine(
pred,
trans,
(int(width), int(height)), #(int(width), int(height)),
flags=cv2.INTER_NEAREST,
borderMode=cv2.BORDER_CONSTANT,
borderValue=(0))
return target_pred
def transform_logits(logits, center, scale, width, height, input_size):
trans = get_affine_transform(center, scale, 0, input_size, inv=1)
channel = logits.shape[2]
target_logits = []
for i in range(channel):
target_logit = cv2.warpAffine(
logits[:,:,i],
trans,
(int(width), int(height)), #(int(width), int(height)),
flags=cv2.INTER_LINEAR,
borderMode=cv2.BORDER_CONSTANT,
borderValue=(0))
target_logits.append(target_logit)
target_logits = np.stack(target_logits,axis=2)
return target_logits
def get_affine_transform(center,
scale,
rot,
output_size,
shift=np.array([0, 0], dtype=np.float32),
inv=0):
if not isinstance(scale, np.ndarray) and not isinstance(scale, list):
print(scale)
scale = np.array([scale, scale])
scale_tmp = scale
src_w = scale_tmp[0]
dst_w = output_size[1]
dst_h = output_size[0]
rot_rad = np.pi * rot / 180
src_dir = get_dir([0, src_w * -0.5], rot_rad)
dst_dir = np.array([0, (dst_w-1) * -0.5], np.float32)
src = np.zeros((3, 2), dtype=np.float32)
dst = np.zeros((3, 2), dtype=np.float32)
src[0, :] = center + scale_tmp * shift
src[1, :] = center + src_dir + scale_tmp * shift
dst[0, :] = [(dst_w-1) * 0.5, (dst_h-1) * 0.5]
dst[1, :] = np.array([(dst_w-1) * 0.5, (dst_h-1) * 0.5]) + dst_dir
src[2:, :] = get_3rd_point(src[0, :], src[1, :])
dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :])
if inv:
trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
else:
trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
return trans
def affine_transform(pt, t):
new_pt = np.array([pt[0], pt[1], 1.]).T
new_pt = np.dot(t, new_pt)
return new_pt[:2]
def get_3rd_point(a, b):
direct = a - b
return b + np.array([-direct[1], direct[0]], dtype=np.float32)
def get_dir(src_point, rot_rad):
sn, cs = np.sin(rot_rad), np.cos(rot_rad)
src_result = [0, 0]
src_result[0] = src_point[0] * cs - src_point[1] * sn
src_result[1] = src_point[0] * sn + src_point[1] * cs
return src_result
def crop(img, center, scale, output_size, rot=0):
trans = get_affine_transform(center, scale, rot, output_size)
dst_img = cv2.warpAffine(img,
trans,
(int(output_size[1]), int(output_size[0])),
flags=cv2.INTER_LINEAR)
return dst_img

View File

@@ -0,0 +1,71 @@
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
"""
@Author : Peike Li
@Contact : peike.li@yahoo.com
@File : warmup_scheduler.py
@Time : 3/28/19 2:24 PM
@Desc :
@License : This source code is licensed under the license found in the
LICENSE file in the root directory of this source tree.
"""
import math
from torch.optim.lr_scheduler import _LRScheduler
class GradualWarmupScheduler(_LRScheduler):
""" Gradually warm-up learning rate with cosine annealing in optimizer.
Proposed in 'Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour'.
"""
def __init__(self, optimizer, total_epoch, eta_min=0, warmup_epoch=10, last_epoch=-1):
self.total_epoch = total_epoch
self.eta_min = eta_min
self.warmup_epoch = warmup_epoch
super(GradualWarmupScheduler, self).__init__(optimizer, last_epoch)
def get_lr(self):
if self.last_epoch <= self.warmup_epoch:
return [self.eta_min + self.last_epoch*(base_lr - self.eta_min)/self.warmup_epoch for base_lr in self.base_lrs]
else:
return [self.eta_min + (base_lr-self.eta_min)*(1+math.cos(math.pi*(self.last_epoch-self.warmup_epoch)/(self.total_epoch-self.warmup_epoch))) / 2 for base_lr in self.base_lrs]
class SGDRScheduler(_LRScheduler):
""" Consine annealing with warm up and restarts.
Proposed in `SGDR: Stochastic Gradient Descent with Warm Restarts`.
"""
def __init__(self, optimizer, total_epoch=150, start_cyclical=100, cyclical_base_lr=7e-4, cyclical_epoch=10, eta_min=0, warmup_epoch=10, last_epoch=-1):
self.total_epoch = total_epoch
self.start_cyclical = start_cyclical
self.cyclical_epoch = cyclical_epoch
self.cyclical_base_lr = cyclical_base_lr
self.eta_min = eta_min
self.warmup_epoch = warmup_epoch
super(SGDRScheduler, self).__init__(optimizer, last_epoch)
def get_lr(self):
if self.last_epoch < self.warmup_epoch:
return [self.eta_min + self.last_epoch*(base_lr - self.eta_min)/self.warmup_epoch for base_lr in self.base_lrs]
elif self.last_epoch < self.start_cyclical:
return [self.eta_min + (base_lr-self.eta_min)*(1+math.cos(math.pi*(self.last_epoch-self.warmup_epoch)/(self.start_cyclical-self.warmup_epoch))) / 2 for base_lr in self.base_lrs]
else:
return [self.eta_min + (self.cyclical_base_lr-self.eta_min)*(1+math.cos(math.pi* ((self.last_epoch-self.start_cyclical)% self.cyclical_epoch)/self.cyclical_epoch)) / 2 for base_lr in self.base_lrs]
if __name__ == '__main__':
import matplotlib.pyplot as plt
import torch
model = torch.nn.Linear(10, 2)
optimizer = torch.optim.SGD(params=model.parameters(), lr=7e-3, momentum=0.9, weight_decay=5e-4)
scheduler_warmup = SGDRScheduler(optimizer, total_epoch=150, eta_min=7e-5, warmup_epoch=10, start_cyclical=100, cyclical_base_lr=3.5e-3, cyclical_epoch=10)
lr = []
for epoch in range(0,150):
scheduler_warmup.step(epoch)
lr.append(scheduler_warmup.get_lr())
plt.style.use('ggplot')
plt.plot(list(range(0,150)), lr)
plt.show()