Add at new repo again

2025-01-28 21:48:35 +00:00
commit 6e660ddb3c
564 changed files with 75575 additions and 0 deletions
--- a/vton-api/preprocess/humanparsing/datasets/init.py
+++ b/vton-api/preprocess/humanparsing/datasets/init.py
--- a/vton-api/preprocess/humanparsing/datasets/pycache/init.cpython-311.pyc
+++ b/vton-api/preprocess/humanparsing/datasets/pycache/init.cpython-311.pyc
--- a/vton-api/preprocess/humanparsing/datasets/pycache/simple_extractor_dataset.cpython-311.pyc
+++ b/vton-api/preprocess/humanparsing/datasets/pycache/simple_extractor_dataset.cpython-311.pyc
--- a/vton-api/preprocess/humanparsing/datasets/datasets.py
+++ b/vton-api/preprocess/humanparsing/datasets/datasets.py
@@ -0,0 +1,201 @@
+#!/usr/bin/env python
+# -*- encoding: utf-8 -*-
+
+"""
+@Author  :   Peike Li
+@Contact :   peike.li@yahoo.com
+@File    :   datasets.py
+@Time    :   8/4/19 3:35 PM
+@Desc    :
+@License :   This source code is licensed under the license found in the
+             LICENSE file in the root directory of this source tree.
+"""
+
+import os
+import numpy as np
+import random
+import torch
+import cv2
+from torch.utils import data
+from utils.transforms import get_affine_transform
+
+
+class LIPDataSet(data.Dataset):
+    def __init__(self, root, dataset, crop_size=[473, 473], scale_factor=0.25,
+                 rotation_factor=30, ignore_label=255, transform=None):
+        self.root = root
+        self.aspect_ratio = crop_size[1] * 1.0 / crop_size[0]
+        self.crop_size = np.asarray(crop_size)
+        self.ignore_label = ignore_label
+        self.scale_factor = scale_factor
+        self.rotation_factor = rotation_factor
+        self.flip_prob = 0.5
+        self.transform = transform
+        self.dataset = dataset
+
+        list_path = os.path.join(self.root, self.dataset + '_id.txt')
+        train_list = [i_id.strip() for i_id in open(list_path)]
+
+        self.train_list = train_list
+        self.number_samples = len(self.train_list)
+
+    def __len__(self):
+        return self.number_samples
+
+    def _box2cs(self, box):
+        x, y, w, h = box[:4]
+        return self._xywh2cs(x, y, w, h)
+
+    def _xywh2cs(self, x, y, w, h):
+        center = np.zeros((2), dtype=np.float32)
+        center[0] = x + w * 0.5
+        center[1] = y + h * 0.5
+        if w > self.aspect_ratio * h:
+            h = w * 1.0 / self.aspect_ratio
+        elif w < self.aspect_ratio * h:
+            w = h * self.aspect_ratio
+        scale = np.array([w * 1.0, h * 1.0], dtype=np.float32)
+        return center, scale
+
+    def __getitem__(self, index):
+        train_item = self.train_list[index]
+
+        im_path = os.path.join(self.root, self.dataset + '_images', train_item + '.jpg')
+        parsing_anno_path = os.path.join(self.root, self.dataset + '_segmentations', train_item + '.png')
+
+        im = cv2.imread(im_path, cv2.IMREAD_COLOR)
+        h, w, _ = im.shape
+        parsing_anno = np.zeros((h, w), dtype=np.long)
+
+        # Get person center and scale
+        person_center, s = self._box2cs([0, 0, w - 1, h - 1])
+        r = 0
+
+        if self.dataset != 'test':
+            # Get pose annotation
+            parsing_anno = cv2.imread(parsing_anno_path, cv2.IMREAD_GRAYSCALE)
+            if self.dataset == 'train' or self.dataset == 'trainval':
+                sf = self.scale_factor
+                rf = self.rotation_factor
+                s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
+                r = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) if random.random() <= 0.6 else 0
+
+                if random.random() <= self.flip_prob:
+                    im = im[:, ::-1, :]
+                    parsing_anno = parsing_anno[:, ::-1]
+                    person_center[0] = im.shape[1] - person_center[0] - 1
+                    right_idx = [15, 17, 19]
+                    left_idx = [14, 16, 18]
+                    for i in range(0, 3):
+                        right_pos = np.where(parsing_anno == right_idx[i])
+                        left_pos = np.where(parsing_anno == left_idx[i])
+                        parsing_anno[right_pos[0], right_pos[1]] = left_idx[i]
+                        parsing_anno[left_pos[0], left_pos[1]] = right_idx[i]
+
+        trans = get_affine_transform(person_center, s, r, self.crop_size)
+        input = cv2.warpAffine(
+            im,
+            trans,
+            (int(self.crop_size[1]), int(self.crop_size[0])),
+            flags=cv2.INTER_LINEAR,
+            borderMode=cv2.BORDER_CONSTANT,
+            borderValue=(0, 0, 0))
+
+        if self.transform:
+            input = self.transform(input)
+
+        meta = {
+            'name': train_item,
+            'center': person_center,
+            'height': h,
+            'width': w,
+            'scale': s,
+            'rotation': r
+        }
+
+        if self.dataset == 'val' or self.dataset == 'test':
+            return input, meta
+        else:
+            label_parsing = cv2.warpAffine(
+                parsing_anno,
+                trans,
+                (int(self.crop_size[1]), int(self.crop_size[0])),
+                flags=cv2.INTER_NEAREST,
+                borderMode=cv2.BORDER_CONSTANT,
+                borderValue=(255))
+
+            label_parsing = torch.from_numpy(label_parsing)
+
+            return input, label_parsing, meta
+
+
+class LIPDataValSet(data.Dataset):
+    def __init__(self, root, dataset='val', crop_size=[473, 473], transform=None, flip=False):
+        self.root = root
+        self.crop_size = crop_size
+        self.transform = transform
+        self.flip = flip
+        self.dataset = dataset
+        self.root = root
+        self.aspect_ratio = crop_size[1] * 1.0 / crop_size[0]
+        self.crop_size = np.asarray(crop_size)
+
+        list_path = os.path.join(self.root, self.dataset + '_id.txt')
+        val_list = [i_id.strip() for i_id in open(list_path)]
+
+        self.val_list = val_list
+        self.number_samples = len(self.val_list)
+
+    def __len__(self):
+        return len(self.val_list)
+
+    def _box2cs(self, box):
+        x, y, w, h = box[:4]
+        return self._xywh2cs(x, y, w, h)
+
+    def _xywh2cs(self, x, y, w, h):
+        center = np.zeros((2), dtype=np.float32)
+        center[0] = x + w * 0.5
+        center[1] = y + h * 0.5
+        if w > self.aspect_ratio * h:
+            h = w * 1.0 / self.aspect_ratio
+        elif w < self.aspect_ratio * h:
+            w = h * self.aspect_ratio
+        scale = np.array([w * 1.0, h * 1.0], dtype=np.float32)
+
+        return center, scale
+
+    def __getitem__(self, index):
+        val_item = self.val_list[index]
+        # Load training image
+        im_path = os.path.join(self.root, self.dataset + '_images', val_item + '.jpg')
+        im = cv2.imread(im_path, cv2.IMREAD_COLOR)
+        h, w, _ = im.shape
+        # Get person center and scale
+        person_center, s = self._box2cs([0, 0, w - 1, h - 1])
+        r = 0
+        trans = get_affine_transform(person_center, s, r, self.crop_size)
+        input = cv2.warpAffine(
+            im,
+            trans,
+            (int(self.crop_size[1]), int(self.crop_size[0])),
+            flags=cv2.INTER_LINEAR,
+            borderMode=cv2.BORDER_CONSTANT,
+            borderValue=(0, 0, 0))
+        input = self.transform(input)
+        flip_input = input.flip(dims=[-1])
+        if self.flip:
+            batch_input_im = torch.stack([input, flip_input])
+        else:
+            batch_input_im = input
+
+        meta = {
+            'name': val_item,
+            'center': person_center,
+            'height': h,
+            'width': w,
+            'scale': s,
+            'rotation': r
+        }
+
+        return batch_input_im, meta
--- a/vton-api/preprocess/humanparsing/datasets/simple_extractor_dataset.py
+++ b/vton-api/preprocess/humanparsing/datasets/simple_extractor_dataset.py
@@ -0,0 +1,89 @@
+#!/usr/bin/env python
+# -*- encoding: utf-8 -*-
+
+"""
+@Author  :   Peike Li
+@Contact :   peike.li@yahoo.com
+@File    :   dataset.py
+@Time    :   8/30/19 9:12 PM
+@Desc    :   Dataset Definition
+@License :   This source code is licensed under the license found in the
+             LICENSE file in the root directory of this source tree.
+"""
+
+import os
+import pdb
+
+import cv2
+import numpy as np
+from PIL import Image
+from torch.utils import data
+from utils.transforms import get_affine_transform
+
+
+class SimpleFolderDataset(data.Dataset):
+    def __init__(self, root, input_size=[512, 512], transform=None):
+        self.root = root
+        self.input_size = input_size
+        self.transform = transform
+        self.aspect_ratio = input_size[1] * 1.0 / input_size[0]
+        self.input_size = np.asarray(input_size)
+        self.is_pil_image = False
+        if isinstance(root, Image.Image):
+            self.file_list = [root]
+            self.is_pil_image = True
+        elif os.path.isfile(root):
+            self.file_list = [os.path.basename(root)]
+            self.root = os.path.dirname(root)
+        else:
+            self.file_list = os.listdir(self.root)
+
+    def __len__(self):
+        return len(self.file_list)
+
+    def _box2cs(self, box):
+        x, y, w, h = box[:4]
+        return self._xywh2cs(x, y, w, h)
+
+    def _xywh2cs(self, x, y, w, h):
+        center = np.zeros((2), dtype=np.float32)
+        center[0] = x + w * 0.5
+        center[1] = y + h * 0.5
+        if w > self.aspect_ratio * h:
+            h = w * 1.0 / self.aspect_ratio
+        elif w < self.aspect_ratio * h:
+            w = h * self.aspect_ratio
+        scale = np.array([w, h], dtype=np.float32)
+        return center, scale
+
+    def __getitem__(self, index):
+        if self.is_pil_image:
+            img = np.asarray(self.file_list[index])[:, :, [2, 1, 0]]
+        else:
+            img_name = self.file_list[index]
+            img_path = os.path.join(self.root, img_name)
+            img = cv2.imread(img_path, cv2.IMREAD_COLOR)
+        h, w, _ = img.shape
+
+        # Get person center and scale
+        person_center, s = self._box2cs([0, 0, w - 1, h - 1])
+        r = 0
+        trans = get_affine_transform(person_center, s, r, self.input_size)
+        input = cv2.warpAffine(
+            img,
+            trans,
+            (int(self.input_size[1]), int(self.input_size[0])),
+            flags=cv2.INTER_LINEAR,
+            borderMode=cv2.BORDER_CONSTANT,
+            borderValue=(0, 0, 0))
+
+        input = self.transform(input)
+        meta = {
+            'center': person_center,
+            'height': h,
+            'width': w,
+            'scale': s,
+            'rotation': r
+        }
+
+        return input, meta
--- a/vton-api/preprocess/humanparsing/datasets/target_generation.py
+++ b/vton-api/preprocess/humanparsing/datasets/target_generation.py
@@ -0,0 +1,40 @@
+import torch
+from torch.nn import functional as F
+
+
+def generate_edge_tensor(label, edge_width=3):
+    label = label.type(torch.cuda.FloatTensor)
+    if len(label.shape) == 2:
+        label = label.unsqueeze(0)
+    n, h, w = label.shape
+    edge = torch.zeros(label.shape, dtype=torch.float).cuda()
+    # right
+    edge_right = edge[:, 1:h, :]
+    edge_right[(label[:, 1:h, :] != label[:, :h - 1, :]) & (label[:, 1:h, :] != 255)
+               & (label[:, :h - 1, :] != 255)] = 1
+
+    # up
+    edge_up = edge[:, :, :w - 1]
+    edge_up[(label[:, :, :w - 1] != label[:, :, 1:w])
+            & (label[:, :, :w - 1] != 255)
+            & (label[:, :, 1:w] != 255)] = 1
+
+    # upright
+    edge_upright = edge[:, :h - 1, :w - 1]
+    edge_upright[(label[:, :h - 1, :w - 1] != label[:, 1:h, 1:w])
+                 & (label[:, :h - 1, :w - 1] != 255)
+                 & (label[:, 1:h, 1:w] != 255)] = 1
+
+    # bottomright
+    edge_bottomright = edge[:, :h - 1, 1:w]
+    edge_bottomright[(label[:, :h - 1, 1:w] != label[:, 1:h, :w - 1])
+                     & (label[:, :h - 1, 1:w] != 255)
+                     & (label[:, 1:h, :w - 1] != 255)] = 1
+
+    kernel = torch.ones((1, 1, edge_width, edge_width), dtype=torch.float).cuda()
+    with torch.no_grad():
+        edge = edge.unsqueeze(1)
+        edge = F.conv2d(edge, kernel, stride=1, padding=1)
+    edge[edge!=0] = 1
+    edge = edge.squeeze()
+    return edge