Add at new repo again

2025-01-28 21:48:35 +00:00
commit 6e660ddb3c
564 changed files with 75575 additions and 0 deletions
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/init.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/init.py
@@ -0,0 +1,9 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+from .data.datasets import builtin  # just to register data
+from .config import add_densepose_config, add_dataset_category_config
+from .densepose_head import ROI_DENSEPOSE_HEAD_REGISTRY
+from .evaluator import DensePoseCOCOEvaluator
+from .roi_head import DensePoseROIHeads
+from .data.structures import DensePoseDataRelative, DensePoseList, DensePoseTransformData
+from .modeling.test_time_augmentation import DensePoseGeneralizedRCNNWithTTA
+from .utils.transform import load_from_cfg
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/config.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/config.py
@@ -0,0 +1,68 @@
+# -*- coding = utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+from detectron2.config import CfgNode as CN
+
+
+def add_dataset_category_config(cfg: CN):
+    """
+    Add config for additional category-related dataset options
+     - category whitelisting
+     - category mapping
+    """
+    _C = cfg
+    _C.DATASETS.CATEGORY_MAPS = CN(new_allowed=True)
+    _C.DATASETS.WHITELISTED_CATEGORIES = CN(new_allowed=True)
+
+
+def add_densepose_config(cfg: CN):
+    """
+    Add config for densepose head.
+    """
+    _C = cfg
+
+    _C.MODEL.DENSEPOSE_ON = True
+
+    _C.MODEL.ROI_DENSEPOSE_HEAD = CN()
+    _C.MODEL.ROI_DENSEPOSE_HEAD.NAME = ""
+    _C.MODEL.ROI_DENSEPOSE_HEAD.NUM_STACKED_CONVS = 8
+    # Number of parts used for point labels
+    _C.MODEL.ROI_DENSEPOSE_HEAD.NUM_PATCHES = 24
+    _C.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL = 4
+    _C.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_DIM = 512
+    _C.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_KERNEL = 3
+    _C.MODEL.ROI_DENSEPOSE_HEAD.UP_SCALE = 2
+    _C.MODEL.ROI_DENSEPOSE_HEAD.HEATMAP_SIZE = 112
+    _C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_TYPE = "ROIAlignV2"
+    _C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_RESOLUTION = 28
+    _C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_SAMPLING_RATIO = 2
+    _C.MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS = 2  # 15 or 2
+    # Overlap threshold for an RoI to be considered foreground (if >= FG_IOU_THRESHOLD)
+    _C.MODEL.ROI_DENSEPOSE_HEAD.FG_IOU_THRESHOLD = 0.7
+    # Loss weights for annotation masks.(14 Parts)
+    _C.MODEL.ROI_DENSEPOSE_HEAD.INDEX_WEIGHTS = 5.0
+    # Loss weights for surface parts. (24 Parts)
+    _C.MODEL.ROI_DENSEPOSE_HEAD.PART_WEIGHTS = 1.0
+    # Loss weights for UV regression.
+    _C.MODEL.ROI_DENSEPOSE_HEAD.POINT_REGRESSION_WEIGHTS = 0.01
+    # For Decoder
+    _C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_ON = True
+    _C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NUM_CLASSES = 256
+    _C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_CONV_DIMS = 256
+    _C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NORM = ""
+    _C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_COMMON_STRIDE = 4
+    # For DeepLab head
+    _C.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB = CN()
+    _C.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NORM = "GN"
+    _C.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NONLOCAL_ON = 0
+    # Confidences
+    # Enable learning confidences (variances) along with the actual values
+    _C.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE = CN({"ENABLED": False})
+    # UV confidence lower bound
+    _C.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.EPSILON = 0.01
+    # Statistical model type for confidence learning, possible values:
+    # - "iid_iso": statistically independent identically distributed residuals
+    #    with isotropic covariance
+    # - "indep_aniso": statistically independent residuals with anisotropic
+    #    covariances
+    _C.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.TYPE = "iid_iso"
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/init.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/init.py
@@ -0,0 +1,9 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+from .build import build_detection_test_loader, build_detection_train_loader
+from .dataset_mapper import DatasetMapper
+
+# ensure the builtin data are registered
+from . import datasets
+
+__all__ = [k for k in globals().keys() if not k.startswith("_")]
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/build.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/build.py
@@ -0,0 +1,405 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+import itertools
+import logging
+import numpy as np
+import operator
+from typing import Any, Callable, Collection, Dict, Iterable, List, Optional
+import torch
+
+from detectron2.config import CfgNode
+from detectron2.data import samplers
+from detectron2.data.build import (
+    load_proposals_into_dataset,
+    print_instances_class_histogram,
+    trivial_batch_collator,
+    worker_init_reset_seed,
+)
+from detectron2.data.catalog import DatasetCatalog, MetadataCatalog
+from detectron2.data.common import AspectRatioGroupedDataset, DatasetFromList, MapDataset
+from detectron2.utils.comm import get_world_size
+
+from .dataset_mapper import DatasetMapper
+from .datasets.coco import DENSEPOSE_KEYS_WITHOUT_MASK as DENSEPOSE_COCO_KEYS_WITHOUT_MASK
+from .datasets.coco import DENSEPOSE_MASK_KEY as DENSEPOSE_COCO_MASK_KEY
+
+__all__ = ["build_detection_train_loader", "build_detection_test_loader"]
+
+
+Instance = Dict[str, Any]
+InstancePredicate = Callable[[Instance], bool]
+
+
+def _compute_num_images_per_worker(cfg: CfgNode):
+    num_workers = get_world_size()
+    images_per_batch = cfg.SOLVER.IMS_PER_BATCH
+    assert (
+        images_per_batch % num_workers == 0
+    ), "SOLVER.IMS_PER_BATCH ({}) must be divisible by the number of workers ({}).".format(
+        images_per_batch, num_workers
+    )
+    assert (
+        images_per_batch >= num_workers
+    ), "SOLVER.IMS_PER_BATCH ({}) must be larger than the number of workers ({}).".format(
+        images_per_batch, num_workers
+    )
+    images_per_worker = images_per_batch // num_workers
+    return images_per_worker
+
+
+def _map_category_id_to_contiguous_id(dataset_name: str, dataset_dicts: Iterable[Instance]):
+    meta = MetadataCatalog.get(dataset_name)
+    for dataset_dict in dataset_dicts:
+        for ann in dataset_dict["annotations"]:
+            ann["category_id"] = meta.thing_dataset_id_to_contiguous_id[ann["category_id"]]
+
+
+def _add_category_id_to_contiguous_id_maps_to_metadata(dataset_names: Iterable[str]):
+    # merge categories for all data
+    merged_categories = {}
+    for dataset_name in dataset_names:
+        meta = MetadataCatalog.get(dataset_name)
+        for cat_id, cat_name in meta.categories.items():
+            if cat_id not in merged_categories:
+                merged_categories[cat_id] = (cat_name, dataset_name)
+                continue
+            cat_name_other, dataset_name_other = merged_categories[cat_id]
+            if cat_name_other != cat_name:
+                raise ValueError(
+                    f"Incompatible categories for category ID {cat_id}: "
+                    f'dataset {dataset_name} value "{cat_name}", '
+                    f'dataset {dataset_name_other} value "{cat_name_other}"'
+                )
+
+    merged_cat_id_to_cont_id = {}
+    for i, cat_id in enumerate(sorted(merged_categories.keys())):
+        merged_cat_id_to_cont_id[cat_id] = i
+
+    # add category maps to metadata
+    for dataset_name in dataset_names:
+        meta = MetadataCatalog.get(dataset_name)
+        categories = meta.get("categories")
+        meta.thing_classes = [categories[cat_id] for cat_id in sorted(categories.keys())]
+        meta.thing_dataset_id_to_contiguous_id = {
+            cat_id: merged_cat_id_to_cont_id[cat_id] for cat_id in sorted(categories.keys())
+        }
+        meta.thing_contiguous_id_to_dataset_id = {
+            merged_cat_id_to_cont_id[cat_id]: cat_id for cat_id in sorted(categories.keys())
+        }
+
+
+def _maybe_create_general_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]:
+    def has_annotations(instance: Instance) -> bool:
+        return "annotations" in instance
+
+    def has_only_crowd_anotations(instance: Instance) -> bool:
+        for ann in instance["annotations"]:
+            if ann.get("is_crowd", 0) == 0:
+                return False
+        return True
+
+    def general_keep_instance_predicate(instance: Instance) -> bool:
+        return has_annotations(instance) and not has_only_crowd_anotations(instance)
+
+    if not cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS:
+        return None
+    return general_keep_instance_predicate
+
+
+def _maybe_create_keypoints_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]:
+
+    min_num_keypoints = cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE
+
+    def has_sufficient_num_keypoints(instance: Instance) -> bool:
+        num_kpts = sum(
+            (np.array(ann["keypoints"][2::3]) > 0).sum()
+            for ann in instance["annotations"]
+            if "keypoints" in ann
+        )
+        return num_kpts >= min_num_keypoints
+
+    if cfg.MODEL.KEYPOINT_ON and (min_num_keypoints > 0):
+        return has_sufficient_num_keypoints
+    return None
+
+
+def _maybe_create_mask_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]:
+    if not cfg.MODEL.MASK_ON:
+        return None
+
+    def has_mask_annotations(instance: Instance) -> bool:
+        return any("segmentation" in ann for ann in instance["annotations"])
+
+    return has_mask_annotations
+
+
+def _maybe_create_densepose_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]:
+    if not cfg.MODEL.DENSEPOSE_ON:
+        return None
+
+    def has_densepose_annotations(instance: Instance) -> bool:
+        for ann in instance["annotations"]:
+            if all(key in ann for key in DENSEPOSE_COCO_KEYS_WITHOUT_MASK) and (
+                (DENSEPOSE_COCO_MASK_KEY in ann) or ("segmentation" in ann)
+            ):
+                return True
+        return False
+
+    return has_densepose_annotations
+
+
+def _maybe_create_specific_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]:
+    specific_predicate_creators = [
+        _maybe_create_keypoints_keep_instance_predicate,
+        _maybe_create_mask_keep_instance_predicate,
+        _maybe_create_densepose_keep_instance_predicate,
+    ]
+    predicates = [creator(cfg) for creator in specific_predicate_creators]
+    predicates = [p for p in predicates if p is not None]
+    if not predicates:
+        return None
+
+    def combined_predicate(instance: Instance) -> bool:
+        return any(p(instance) for p in predicates)
+
+    return combined_predicate
+
+
+def _get_train_keep_instance_predicate(cfg: CfgNode):
+    general_keep_predicate = _maybe_create_general_keep_instance_predicate(cfg)
+    combined_specific_keep_predicate = _maybe_create_specific_keep_instance_predicate(cfg)
+
+    def combined_general_specific_keep_predicate(instance: Instance) -> bool:
+        return general_keep_predicate(instance) and combined_specific_keep_predicate(instance)
+
+    if (general_keep_predicate is None) and (combined_specific_keep_predicate is None):
+        return None
+    if general_keep_predicate is None:
+        return combined_specific_keep_predicate
+    if combined_specific_keep_predicate is None:
+        return general_keep_predicate
+    return combined_general_specific_keep_predicate
+
+
+def _get_test_keep_instance_predicate(cfg: CfgNode):
+    general_keep_predicate = _maybe_create_general_keep_instance_predicate(cfg)
+    return general_keep_predicate
+
+
+def _maybe_filter_and_map_categories(
+    dataset_name: str, dataset_dicts: List[Instance]
+) -> List[Instance]:
+    meta = MetadataCatalog.get(dataset_name)
+    whitelisted_categories = meta.get("whitelisted_categories")
+    category_map = meta.get("category_map", {})
+    if whitelisted_categories is None and not category_map:
+        return dataset_dicts
+    filtered_dataset_dicts = []
+    for dataset_dict in dataset_dicts:
+        anns = []
+        for ann in dataset_dict["annotations"]:
+            cat_id = ann["category_id"]
+            if whitelisted_categories is not None and cat_id not in whitelisted_categories:
+                continue
+            ann["category_id"] = category_map.get(cat_id, cat_id)
+            anns.append(ann)
+        dataset_dict["annotations"] = anns
+        filtered_dataset_dicts.append(dataset_dict)
+    return filtered_dataset_dicts
+
+
+def _add_category_whitelists_to_metadata(cfg: CfgNode):
+    for dataset_name, whitelisted_cat_ids in cfg.DATASETS.WHITELISTED_CATEGORIES.items():
+        meta = MetadataCatalog.get(dataset_name)
+        meta.whitelisted_categories = whitelisted_cat_ids
+        logger = logging.getLogger(__name__)
+        logger.info(
+            "Whitelisted categories for dataset {}: {}".format(
+                dataset_name, meta.whitelisted_categories
+            )
+        )
+
+
+def _add_category_maps_to_metadata(cfg: CfgNode):
+    for dataset_name, category_map in cfg.DATASETS.CATEGORY_MAPS.items():
+        category_map = {
+            int(cat_id_src): int(cat_id_dst) for cat_id_src, cat_id_dst in category_map.items()
+        }
+        meta = MetadataCatalog.get(dataset_name)
+        meta.category_map = category_map
+        logger = logging.getLogger(__name__)
+        logger.info("Category maps for dataset {}: {}".format(dataset_name, meta.category_map))
+
+
+def combine_detection_dataset_dicts(
+    dataset_names: Collection[str],
+    keep_instance_predicate: Optional[InstancePredicate] = None,
+    proposal_files: Optional[Collection[str]] = None,
+) -> List[Instance]:
+    """
+    Load and prepare dataset dicts for training / testing
+
+    Args:
+        dataset_names (Collection[str]): a list of dataset names
+        keep_instance_predicate (Callable: Dict[str, Any] -> bool): predicate
+            applied to instance dicts which defines whether to keep the instance
+        proposal_files (Collection[str]): if given, a list of object proposal files
+            that match each dataset in `dataset_names`.
+    """
+    assert len(dataset_names)
+    if proposal_files is None:
+        proposal_files = [None] * len(dataset_names)
+    assert len(dataset_names) == len(proposal_files)
+    # load annotations and dataset metadata
+    dataset_map = {}
+    for dataset_name in dataset_names:
+        dataset_dicts = DatasetCatalog.get(dataset_name)
+        dataset_map[dataset_name] = dataset_dicts
+    # initialize category maps
+    _add_category_id_to_contiguous_id_maps_to_metadata(dataset_names)
+    # apply category maps
+    all_datasets_dicts = []
+    for dataset_name, proposal_file in zip(dataset_names, proposal_files):
+        dataset_dicts = dataset_map[dataset_name]
+        assert len(dataset_dicts), f"Dataset '{dataset_name}' is empty!"
+        if proposal_file is not None:
+            dataset_dicts = load_proposals_into_dataset(dataset_dicts, proposal_file)
+        dataset_dicts = _maybe_filter_and_map_categories(dataset_name, dataset_dicts)
+        _map_category_id_to_contiguous_id(dataset_name, dataset_dicts)
+        print_instances_class_histogram(
+            dataset_dicts, MetadataCatalog.get(dataset_name).thing_classes
+        )
+        all_datasets_dicts.append(dataset_dicts)
+
+    if keep_instance_predicate is not None:
+        all_datasets_dicts_plain = [
+            d
+            for d in itertools.chain.from_iterable(all_datasets_dicts)
+            if keep_instance_predicate(d)
+        ]
+    else:
+        all_datasets_dicts_plain = list(itertools.chain.from_iterable(all_datasets_dicts))
+    return all_datasets_dicts_plain
+
+
+def build_detection_train_loader(cfg: CfgNode, mapper=None):
+    """
+    A data loader is created in a way similar to that of Detectron2.
+    The main differences are:
+     - it allows to combine data with different but compatible object category sets
+
+    The data loader is created by the following steps:
+    1. Use the dataset names in config to query :class:`DatasetCatalog`, and obtain a list of dicts.
+    2. Start workers to work on the dicts. Each worker will:
+        * Map each metadata dict into another format to be consumed by the model.
+        * Batch them by simply putting dicts into a list.
+    The batched ``list[mapped_dict]`` is what this dataloader will return.
+
+    Args:
+        cfg (CfgNode): the config
+        mapper (callable): a callable which takes a sample (dict) from dataset and
+            returns the format to be consumed by the model.
+            By default it will be `DatasetMapper(cfg, True)`.
+
+    Returns:
+        an infinite iterator of training data
+    """
+    images_per_worker = _compute_num_images_per_worker(cfg)
+
+    _add_category_whitelists_to_metadata(cfg)
+    _add_category_maps_to_metadata(cfg)
+    dataset_dicts = combine_detection_dataset_dicts(
+        cfg.DATASETS.TRAIN,
+        keep_instance_predicate=_get_train_keep_instance_predicate(cfg),
+        proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None,
+    )
+    dataset = DatasetFromList(dataset_dicts, copy=False)
+
+    if mapper is None:
+        mapper = DatasetMapper(cfg, True)
+    dataset = MapDataset(dataset, mapper)
+
+    sampler_name = cfg.DATALOADER.SAMPLER_TRAIN
+    logger = logging.getLogger(__name__)
+    logger.info("Using training sampler {}".format(sampler_name))
+    if sampler_name == "TrainingSampler":
+        sampler = samplers.TrainingSampler(len(dataset))
+    elif sampler_name == "RepeatFactorTrainingSampler":
+        sampler = samplers.RepeatFactorTrainingSampler(
+            dataset_dicts, cfg.DATALOADER.REPEAT_THRESHOLD
+        )
+    else:
+        raise ValueError("Unknown training sampler: {}".format(sampler_name))
+
+    if cfg.DATALOADER.ASPECT_RATIO_GROUPING:
+        data_loader = torch.utils.data.DataLoader(
+            dataset,
+            sampler=sampler,
+            num_workers=cfg.DATALOADER.NUM_WORKERS,
+            batch_sampler=None,
+            collate_fn=operator.itemgetter(0),  # don't batch, but yield individual elements
+            worker_init_fn=worker_init_reset_seed,
+        )  # yield individual mapped dict
+        data_loader = AspectRatioGroupedDataset(data_loader, images_per_worker)
+    else:
+        batch_sampler = torch.utils.data.sampler.BatchSampler(
+            sampler, images_per_worker, drop_last=True
+        )
+        # drop_last so the batch always have the same size
+        data_loader = torch.utils.data.DataLoader(
+            dataset,
+            num_workers=cfg.DATALOADER.NUM_WORKERS,
+            batch_sampler=batch_sampler,
+            collate_fn=trivial_batch_collator,
+            worker_init_fn=worker_init_reset_seed,
+        )
+
+    return data_loader
+
+
+def build_detection_test_loader(cfg, dataset_name, mapper=None):
+    """
+    Similar to `build_detection_train_loader`.
+    But this function uses the given `dataset_name` argument (instead of the names in cfg),
+    and uses batch size 1.
+
+    Args:
+        cfg: a detectron2 CfgNode
+        dataset_name (str): a name of the dataset that's available in the DatasetCatalog
+        mapper (callable): a callable which takes a sample (dict) from dataset
+            and returns the format to be consumed by the model.
+            By default it will be `DatasetMapper(cfg, False)`.
+
+    Returns:
+        DataLoader: a torch DataLoader, that loads the given detection
+            dataset, with test-time transformation and batching.
+    """
+    _add_category_whitelists_to_metadata(cfg)
+    _add_category_maps_to_metadata(cfg)
+    dataset_dicts = combine_detection_dataset_dicts(
+        [dataset_name],
+        keep_instance_predicate=_get_test_keep_instance_predicate(cfg),
+        proposal_files=[
+            cfg.DATASETS.PROPOSAL_FILES_TEST[list(cfg.DATASETS.TEST).index(dataset_name)]
+        ]
+        if cfg.MODEL.LOAD_PROPOSALS
+        else None,
+    )
+
+    dataset = DatasetFromList(dataset_dicts)
+    if mapper is None:
+        mapper = DatasetMapper(cfg, False)
+    dataset = MapDataset(dataset, mapper)
+
+    sampler = samplers.InferenceSampler(len(dataset))
+    # Always use 1 image per worker during inference since this is the
+    # standard when reporting inference time in papers.
+    batch_sampler = torch.utils.data.sampler.BatchSampler(sampler, 1, drop_last=False)
+
+    data_loader = torch.utils.data.DataLoader(
+        dataset,
+        num_workers=cfg.DATALOADER.NUM_WORKERS,
+        batch_sampler=batch_sampler,
+        collate_fn=trivial_batch_collator,
+    )
+    return data_loader
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/dataset_mapper.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/dataset_mapper.py
@@ -0,0 +1,118 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+import copy
+import torch
+from fvcore.common.file_io import PathManager
+
+from detectron2.data import MetadataCatalog
+from detectron2.data import detection_utils as utils
+from detectron2.data import transforms as T
+
+from .structures import DensePoseDataRelative, DensePoseList, DensePoseTransformData
+
+
+class DatasetMapper:
+    """
+    A customized version of `detectron2.data.DatasetMapper`
+    """
+
+    def __init__(self, cfg, is_train=True):
+        self.tfm_gens = utils.build_transform_gen(cfg, is_train)
+
+        # fmt: off
+        self.img_format     = cfg.INPUT.FORMAT
+        self.mask_on        = cfg.MODEL.MASK_ON
+        self.keypoint_on    = cfg.MODEL.KEYPOINT_ON
+        self.densepose_on   = cfg.MODEL.DENSEPOSE_ON
+        assert not cfg.MODEL.LOAD_PROPOSALS, "not supported yet"
+        # fmt: on
+        if self.keypoint_on and is_train:
+            # Flip only makes sense in training
+            self.keypoint_hflip_indices = utils.create_keypoint_hflip_indices(cfg.DATASETS.TRAIN)
+        else:
+            self.keypoint_hflip_indices = None
+
+        if self.densepose_on:
+            densepose_transform_srcs = [
+                MetadataCatalog.get(ds).densepose_transform_src
+                for ds in cfg.DATASETS.TRAIN + cfg.DATASETS.TEST
+            ]
+            assert len(densepose_transform_srcs) > 0
+            # TODO: check that DensePose transformation data is the same for
+            # all the data. Otherwise one would have to pass DB ID with
+            # each entry to select proper transformation data. For now, since
+            # all DensePose annotated data uses the same data semantics, we
+            # omit this check.
+            densepose_transform_data_fpath = PathManager.get_local_path(densepose_transform_srcs[0])
+            self.densepose_transform_data = DensePoseTransformData.load(
+                densepose_transform_data_fpath
+            )
+
+        self.is_train = is_train
+
+    def __call__(self, dataset_dict):
+        """
+        Args:
+            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
+
+        Returns:
+            dict: a format that builtin models in detectron2 accept
+        """
+        dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below
+        image = utils.read_image(dataset_dict["file_name"], format=self.img_format)
+        utils.check_image_size(dataset_dict, image)
+
+        image, transforms = T.apply_transform_gens(self.tfm_gens, image)
+        image_shape = image.shape[:2]  # h, w
+        dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32"))
+
+        if not self.is_train:
+            dataset_dict.pop("annotations", None)
+            return dataset_dict
+
+        for anno in dataset_dict["annotations"]:
+            if not self.mask_on:
+                anno.pop("segmentation", None)
+            if not self.keypoint_on:
+                anno.pop("keypoints", None)
+
+        # USER: Implement additional transformations if you have other types of data
+        # USER: Don't call transpose_densepose if you don't need
+        annos = [
+            self._transform_densepose(
+                utils.transform_instance_annotations(
+                    obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices
+                ),
+                transforms,
+            )
+            for obj in dataset_dict.pop("annotations")
+            if obj.get("iscrowd", 0) == 0
+        ]
+        instances = utils.annotations_to_instances(annos, image_shape)
+
+        if len(annos) and "densepose" in annos[0]:
+            gt_densepose = [obj["densepose"] for obj in annos]
+            instances.gt_densepose = DensePoseList(gt_densepose, instances.gt_boxes, image_shape)
+
+        dataset_dict["instances"] = instances[instances.gt_boxes.nonempty()]
+        return dataset_dict
+
+    def _transform_densepose(self, annotation, transforms):
+        if not self.densepose_on:
+            return annotation
+
+        # Handle densepose annotations
+        is_valid, reason_not_valid = DensePoseDataRelative.validate_annotation(annotation)
+        if is_valid:
+            densepose_data = DensePoseDataRelative(annotation, cleanup=True)
+            densepose_data.apply_transform(transforms, self.densepose_transform_data)
+            annotation["densepose"] = densepose_data
+        else:
+            # logger = logging.getLogger(__name__)
+            # logger.debug("Could not load DensePose annotation: {}".format(reason_not_valid))
+            DensePoseDataRelative.cleanup_annotation(annotation)
+            # NOTE: annotations for certain instances may be unavailable.
+            # 'None' is accepted by the DensePostList data structure.
+            annotation["densepose"] = None
+        return annotation
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/datasets/init.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/datasets/init.py
@@ -0,0 +1,5 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+from . import builtin  # ensure the builtin data are registered
+
+__all__ = [k for k in globals().keys() if "builtin" not in k and not k.startswith("_")]
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/datasets/builtin.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/datasets/builtin.py
@@ -0,0 +1,10 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+from .coco import BASE_DATASETS as BASE_COCO_DATASETS
+from .coco import DATASETS as COCO_DATASETS
+from .coco import register_datasets as register_coco_datasets
+
+DEFAULT_DATASETS_ROOT = "data"
+
+
+register_coco_datasets(COCO_DATASETS, DEFAULT_DATASETS_ROOT)
+register_coco_datasets(BASE_COCO_DATASETS, DEFAULT_DATASETS_ROOT)
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/datasets/coco.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/datasets/coco.py
@@ -0,0 +1,314 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+import contextlib
+import io
+import logging
+import os
+from dataclasses import dataclass
+from typing import Any, Dict, Iterable, List, Optional
+from fvcore.common.file_io import PathManager
+from fvcore.common.timer import Timer
+
+from detectron2.data import DatasetCatalog, MetadataCatalog
+from detectron2.structures import BoxMode
+
+DENSEPOSE_MASK_KEY = "dp_masks"
+DENSEPOSE_KEYS_WITHOUT_MASK = ["dp_x", "dp_y", "dp_I", "dp_U", "dp_V"]
+DENSEPOSE_KEYS = DENSEPOSE_KEYS_WITHOUT_MASK + [DENSEPOSE_MASK_KEY]
+DENSEPOSE_METADATA_URL_PREFIX = "https://dl.fbaipublicfiles.com/densepose/data/"
+
+
+@dataclass
+class CocoDatasetInfo:
+    name: str
+    images_root: str
+    annotations_fpath: str
+
+
+DATASETS = [
+    CocoDatasetInfo(
+        name="densepose_coco_2014_train",
+        images_root="coco/train2014",
+        annotations_fpath="coco/annotations/densepose_train2014.json",
+    ),
+    CocoDatasetInfo(
+        name="densepose_coco_2014_minival",
+        images_root="coco/val2014",
+        annotations_fpath="coco/annotations/densepose_minival2014.json",
+    ),
+    CocoDatasetInfo(
+        name="densepose_coco_2014_minival_100",
+        images_root="coco/val2014",
+        annotations_fpath="coco/annotations/densepose_minival2014_100.json",
+    ),
+    CocoDatasetInfo(
+        name="densepose_coco_2014_valminusminival",
+        images_root="coco/val2014",
+        annotations_fpath="coco/annotations/densepose_valminusminival2014.json",
+    ),
+    CocoDatasetInfo(
+        name="densepose_chimps",
+        images_root="densepose_evolution/densepose_chimps",
+        annotations_fpath="densepose_evolution/annotations/densepose_chimps_densepose.json",
+    ),
+]
+
+
+BASE_DATASETS = [
+    CocoDatasetInfo(
+        name="base_coco_2017_train",
+        images_root="coco/train2017",
+        annotations_fpath="coco/annotations/instances_train2017.json",
+    ),
+    CocoDatasetInfo(
+        name="base_coco_2017_val",
+        images_root="coco/val2017",
+        annotations_fpath="coco/annotations/instances_val2017.json",
+    ),
+    CocoDatasetInfo(
+        name="base_coco_2017_val_100",
+        images_root="coco/val2017",
+        annotations_fpath="coco/annotations/instances_val2017_100.json",
+    ),
+]
+
+
+def _is_relative_local_path(path: os.PathLike):
+    path_str = os.fsdecode(path)
+    return ("://" not in path_str) and not os.path.isabs(path)
+
+
+def _maybe_prepend_base_path(base_path: Optional[os.PathLike], path: os.PathLike):
+    """
+    Prepends the provided path with a base path prefix if:
+    1) base path is not None;
+    2) path is a local path
+    """
+    if base_path is None:
+        return path
+    if _is_relative_local_path(path):
+        return os.path.join(base_path, path)
+    return path
+
+
+def get_metadata(base_path: Optional[os.PathLike]) -> Dict[str, Any]:
+    """
+    Returns metadata associated with COCO DensePose data
+
+    Args:
+    base_path: Optional[os.PathLike]
+        Base path used to load metadata from
+
+    Returns:
+    Dict[str, Any]
+        Metadata in the form of a dictionary
+    """
+    meta = {
+        "densepose_transform_src": _maybe_prepend_base_path(
+            base_path, "UV_symmetry_transforms.mat"
+        ),
+        "densepose_smpl_subdiv": _maybe_prepend_base_path(base_path, "SMPL_subdiv.mat"),
+        "densepose_smpl_subdiv_transform": _maybe_prepend_base_path(
+            base_path, "SMPL_SUBDIV_TRANSFORM.mat"
+        ),
+    }
+    return meta
+
+
+def _load_coco_annotations(json_file: str):
+    """
+    Load COCO annotations from a JSON file
+
+    Args:
+        json_file: str
+            Path to the file to load annotations from
+    Returns:
+        Instance of `pycocotools.coco.COCO` that provides access to annotations
+        data
+    """
+    from pycocotools.coco import COCO
+
+    logger = logging.getLogger(__name__)
+    timer = Timer()
+    with contextlib.redirect_stdout(io.StringIO()):
+        coco_api = COCO(json_file)
+    if timer.seconds() > 1:
+        logger.info("Loading {} takes {:.2f} seconds.".format(json_file, timer.seconds()))
+    return coco_api
+
+
+def _add_categories_metadata(dataset_name: str, categories: Dict[str, Any]):
+    meta = MetadataCatalog.get(dataset_name)
+    meta.categories = {c["id"]: c["name"] for c in categories}
+    logger = logging.getLogger(__name__)
+    logger.info("Dataset {} categories: {}".format(dataset_name, categories))
+
+
+def _verify_annotations_have_unique_ids(json_file: str, anns: List[List[Dict[str, Any]]]):
+    if "minival" in json_file:
+        # Skip validation on COCO2014 valminusminival and minival annotations
+        # The ratio of buggy annotations there is tiny and does not affect accuracy
+        # Therefore we explicitly white-list them
+        return
+    ann_ids = [ann["id"] for anns_per_image in anns for ann in anns_per_image]
+    assert len(set(ann_ids)) == len(ann_ids), "Annotation ids in '{}' are not unique!".format(
+        json_file
+    )
+
+
+def _maybe_add_bbox(obj: Dict[str, Any], ann_dict: Dict[str, Any]):
+    if "bbox" not in ann_dict:
+        return
+    obj["bbox"] = ann_dict["bbox"]
+    obj["bbox_mode"] = BoxMode.XYWH_ABS
+
+
+def _maybe_add_segm(obj: Dict[str, Any], ann_dict: Dict[str, Any]):
+    if "segmentation" not in ann_dict:
+        return
+    segm = ann_dict["segmentation"]
+    if not isinstance(segm, dict):
+        # filter out invalid polygons (< 3 points)
+        segm = [poly for poly in segm if len(poly) % 2 == 0 and len(poly) >= 6]
+        if len(segm) == 0:
+            return
+    obj["segmentation"] = segm
+
+
+def _maybe_add_keypoints(obj: Dict[str, Any], ann_dict: Dict[str, Any]):
+    if "keypoints" not in ann_dict:
+        return
+    keypts = ann_dict["keypoints"]  # list[int]
+    for idx, v in enumerate(keypts):
+        if idx % 3 != 2:
+            # COCO's segmentation coordinates are floating points in [0, H or W],
+            # but keypoint coordinates are integers in [0, H-1 or W-1]
+            # Therefore we assume the coordinates are "pixel indices" and
+            # add 0.5 to convert to floating point coordinates.
+            keypts[idx] = v + 0.5
+    obj["keypoints"] = keypts
+
+
+def _maybe_add_densepose(obj: Dict[str, Any], ann_dict: Dict[str, Any]):
+    for key in DENSEPOSE_KEYS:
+        if key in ann_dict:
+            obj[key] = ann_dict[key]
+
+
+def _combine_images_with_annotations(
+    dataset_name: str,
+    image_root: str,
+    img_datas: Iterable[Dict[str, Any]],
+    ann_datas: Iterable[Iterable[Dict[str, Any]]],
+):
+
+    ann_keys = ["iscrowd", "category_id"]
+    dataset_dicts = []
+
+    for img_dict, ann_dicts in zip(img_datas, ann_datas):
+        record = {}
+        record["file_name"] = os.path.join(image_root, img_dict["file_name"])
+        record["height"] = img_dict["height"]
+        record["width"] = img_dict["width"]
+        record["image_id"] = img_dict["id"]
+        record["dataset"] = dataset_name
+        objs = []
+        for ann_dict in ann_dicts:
+            assert ann_dict["image_id"] == record["image_id"]
+            assert ann_dict.get("ignore", 0) == 0
+            obj = {key: ann_dict[key] for key in ann_keys if key in ann_dict}
+            _maybe_add_bbox(obj, ann_dict)
+            _maybe_add_segm(obj, ann_dict)
+            _maybe_add_keypoints(obj, ann_dict)
+            _maybe_add_densepose(obj, ann_dict)
+            objs.append(obj)
+        record["annotations"] = objs
+        dataset_dicts.append(record)
+    return dataset_dicts
+
+
+def load_coco_json(annotations_json_file: str, image_root: str, dataset_name: str):
+    """
+    Loads a JSON file with annotations in COCO instances format.
+    Replaces `detectron2.data.data.coco.load_coco_json` to handle metadata
+    in a more flexible way. Postpones category mapping to a later stage to be
+    able to combine several data with different (but coherent) sets of
+    categories.
+
+    Args:
+
+    annotations_json_file: str
+        Path to the JSON file with annotations in COCO instances format.
+    image_root: str
+        directory that contains all the images
+    dataset_name: str
+        the name that identifies a dataset, e.g. "densepose_coco_2014_train"
+    extra_annotation_keys: Optional[List[str]]
+        If provided, these keys are used to extract additional data from
+        the annotations.
+    """
+    coco_api = _load_coco_annotations(PathManager.get_local_path(annotations_json_file))
+    _add_categories_metadata(dataset_name, coco_api.loadCats(coco_api.getCatIds()))
+    # sort indices for reproducible results
+    img_ids = sorted(coco_api.imgs.keys())
+    # imgs is a list of dicts, each looks something like:
+    # {'license': 4,
+    #  'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg',
+    #  'file_name': 'COCO_val2014_000000001268.jpg',
+    #  'height': 427,
+    #  'width': 640,
+    #  'date_captured': '2013-11-17 05:57:24',
+    #  'id': 1268}
+    imgs = coco_api.loadImgs(img_ids)
+    logger = logging.getLogger(__name__)
+    logger.info("Loaded {} images in COCO format from {}".format(len(imgs), annotations_json_file))
+    # anns is a list[list[dict]], where each dict is an annotation
+    # record for an object. The inner list enumerates the objects in an image
+    # and the outer list enumerates over images.
+    anns = [coco_api.imgToAnns[img_id] for img_id in img_ids]
+    _verify_annotations_have_unique_ids(annotations_json_file, anns)
+    dataset_records = _combine_images_with_annotations(dataset_name, image_root, imgs, anns)
+    return dataset_records
+
+
+def register_dataset(dataset_data: CocoDatasetInfo, datasets_root: Optional[os.PathLike] = None):
+    """
+    Registers provided COCO DensePose dataset
+
+    Args:
+    dataset_data: CocoDatasetInfo
+        Dataset data
+    datasets_root: Optional[os.PathLike]
+        Datasets root folder (default: None)
+    """
+    annotations_fpath = _maybe_prepend_base_path(datasets_root, dataset_data.annotations_fpath)
+    images_root = _maybe_prepend_base_path(datasets_root, dataset_data.images_root)
+
+    def load_annotations():
+        return load_coco_json(
+            annotations_json_file=annotations_fpath,
+            image_root=images_root,
+            dataset_name=dataset_data.name,
+        )
+
+    DatasetCatalog.register(dataset_data.name, load_annotations)
+    MetadataCatalog.get(dataset_data.name).set(
+        json_file=annotations_fpath,
+        image_root=images_root,
+        **get_metadata(DENSEPOSE_METADATA_URL_PREFIX)
+    )
+
+
+def register_datasets(
+    datasets_data: Iterable[CocoDatasetInfo], datasets_root: Optional[os.PathLike] = None
+):
+    """
+    Registers provided COCO DensePose data
+
+    Args:
+    datasets_data: Iterable[CocoDatasetInfo]
+        An iterable of dataset datas
+    datasets_root: Optional[os.PathLike]
+        Datasets root folder (default: None)
+    """
+    for dataset_data in datasets_data:
+        register_dataset(dataset_data, datasets_root)
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/structures.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/structures.py
@@ -0,0 +1,579 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+import base64
+import numpy as np
+from io import BytesIO
+import torch
+from PIL import Image
+from torch.nn import functional as F
+
+
+class DensePoseTransformData(object):
+
+    # Horizontal symmetry label transforms used for horizontal flip
+    MASK_LABEL_SYMMETRIES = [0, 1, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 14]
+    # fmt: off
+    POINT_LABEL_SYMMETRIES = [ 0, 1, 2, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15, 18, 17, 20, 19, 22, 21, 24, 23]  # noqa
+    # fmt: on
+
+    def __init__(self, uv_symmetries):
+        self.mask_label_symmetries = DensePoseTransformData.MASK_LABEL_SYMMETRIES
+        self.point_label_symmetries = DensePoseTransformData.POINT_LABEL_SYMMETRIES
+        self.uv_symmetries = uv_symmetries
+
+    @staticmethod
+    def load(fpath):
+        import scipy.io
+
+        uv_symmetry_map = scipy.io.loadmat(fpath)
+        uv_symmetry_map_torch = {}
+        for key in ["U_transforms", "V_transforms"]:
+            uv_symmetry_map_torch[key] = []
+            map_src = uv_symmetry_map[key]
+            map_dst = uv_symmetry_map_torch[key]
+            for i in range(map_src.shape[1]):
+                map_dst.append(torch.from_numpy(map_src[0, i]).to(dtype=torch.float))
+            uv_symmetry_map_torch[key] = torch.stack(map_dst, dim=0).to(
+                device=torch.cuda.current_device()
+            )
+        transform_data = DensePoseTransformData(uv_symmetry_map_torch)
+        return transform_data
+
+
+class DensePoseDataRelative(object):
+    """
+    Dense pose relative annotations that can be applied to any bounding box:
+        x - normalized X coordinates [0, 255] of annotated points
+        y - normalized Y coordinates [0, 255] of annotated points
+        i - body part labels 0,...,24 for annotated points
+        u - body part U coordinates [0, 1] for annotated points
+        v - body part V coordinates [0, 1] for annotated points
+        segm - 256x256 segmentation mask with values 0,...,14
+    To obtain absolute x and y data wrt some bounding box one needs to first
+    divide the data by 256, multiply by the respective bounding box size
+    and add bounding box offset:
+        x_img = x0 + x_norm * w / 256.0
+        y_img = y0 + y_norm * h / 256.0
+    Segmentation masks are typically sampled to get image-based masks.
+    """
+
+    # Key for normalized X coordinates in annotation dict
+    X_KEY = "dp_x"
+    # Key for normalized Y coordinates in annotation dict
+    Y_KEY = "dp_y"
+    # Key for U part coordinates in annotation dict
+    U_KEY = "dp_U"
+    # Key for V part coordinates in annotation dict
+    V_KEY = "dp_V"
+    # Key for I point labels in annotation dict
+    I_KEY = "dp_I"
+    # Key for segmentation mask in annotation dict
+    S_KEY = "dp_masks"
+    # Number of body parts in segmentation masks
+    N_BODY_PARTS = 14
+    # Number of parts in point labels
+    N_PART_LABELS = 24
+    MASK_SIZE = 256
+
+    def __init__(self, annotation, cleanup=False):
+        is_valid, reason_not_valid = DensePoseDataRelative.validate_annotation(annotation)
+        assert is_valid, "Invalid DensePose annotations: {}".format(reason_not_valid)
+        self.x = torch.as_tensor(annotation[DensePoseDataRelative.X_KEY])
+        self.y = torch.as_tensor(annotation[DensePoseDataRelative.Y_KEY])
+        self.i = torch.as_tensor(annotation[DensePoseDataRelative.I_KEY])
+        self.u = torch.as_tensor(annotation[DensePoseDataRelative.U_KEY])
+        self.v = torch.as_tensor(annotation[DensePoseDataRelative.V_KEY])
+        self.segm = DensePoseDataRelative.extract_segmentation_mask(annotation)
+        self.device = torch.device("cpu")
+        if cleanup:
+            DensePoseDataRelative.cleanup_annotation(annotation)
+
+    def to(self, device):
+        if self.device == device:
+            return self
+        new_data = DensePoseDataRelative.__new__(DensePoseDataRelative)
+        new_data.x = self.x
+        new_data.x = self.x.to(device)
+        new_data.y = self.y.to(device)
+        new_data.i = self.i.to(device)
+        new_data.u = self.u.to(device)
+        new_data.v = self.v.to(device)
+        new_data.segm = self.segm.to(device)
+        new_data.device = device
+        return new_data
+
+    @staticmethod
+    def extract_segmentation_mask(annotation):
+        import pycocotools.mask as mask_utils
+
+        poly_specs = annotation[DensePoseDataRelative.S_KEY]
+        segm = torch.zeros((DensePoseDataRelative.MASK_SIZE,) * 2, dtype=torch.float32)
+        for i in range(DensePoseDataRelative.N_BODY_PARTS):
+            poly_i = poly_specs[i]
+            if poly_i:
+                mask_i = mask_utils.decode(poly_i)
+                segm[mask_i > 0] = i + 1
+        return segm
+
+    @staticmethod
+    def validate_annotation(annotation):
+        for key in [
+            DensePoseDataRelative.X_KEY,
+            DensePoseDataRelative.Y_KEY,
+            DensePoseDataRelative.I_KEY,
+            DensePoseDataRelative.U_KEY,
+            DensePoseDataRelative.V_KEY,
+            DensePoseDataRelative.S_KEY,
+        ]:
+            if key not in annotation:
+                return False, "no {key} data in the annotation".format(key=key)
+        return True, None
+
+    @staticmethod
+    def cleanup_annotation(annotation):
+        for key in [
+            DensePoseDataRelative.X_KEY,
+            DensePoseDataRelative.Y_KEY,
+            DensePoseDataRelative.I_KEY,
+            DensePoseDataRelative.U_KEY,
+            DensePoseDataRelative.V_KEY,
+            DensePoseDataRelative.S_KEY,
+        ]:
+            if key in annotation:
+                del annotation[key]
+
+    def apply_transform(self, transforms, densepose_transform_data):
+        self._transform_pts(transforms, densepose_transform_data)
+        self._transform_segm(transforms, densepose_transform_data)
+
+    def _transform_pts(self, transforms, dp_transform_data):
+        import detectron2.data.transforms as T
+
+        # NOTE: This assumes that HorizFlipTransform is the only one that does flip
+        do_hflip = sum(isinstance(t, T.HFlipTransform) for t in transforms.transforms) % 2 == 1
+        if do_hflip:
+            self.x = self.segm.size(1) - self.x
+            self._flip_iuv_semantics(dp_transform_data)
+
+    def _flip_iuv_semantics(self, dp_transform_data: DensePoseTransformData) -> None:
+        i_old = self.i.clone()
+        uv_symmetries = dp_transform_data.uv_symmetries
+        pt_label_symmetries = dp_transform_data.point_label_symmetries
+        for i in range(self.N_PART_LABELS):
+            if i + 1 in i_old:
+                annot_indices_i = i_old == i + 1
+                if pt_label_symmetries[i + 1] != i + 1:
+                    self.i[annot_indices_i] = pt_label_symmetries[i + 1]
+                u_loc = (self.u[annot_indices_i] * 255).long()
+                v_loc = (self.v[annot_indices_i] * 255).long()
+                self.u[annot_indices_i] = uv_symmetries["U_transforms"][i][v_loc, u_loc].to(
+                    device=self.u.device
+                )
+                self.v[annot_indices_i] = uv_symmetries["V_transforms"][i][v_loc, u_loc].to(
+                    device=self.v.device
+                )
+
+    def _transform_segm(self, transforms, dp_transform_data):
+        import detectron2.data.transforms as T
+
+        # NOTE: This assumes that HorizFlipTransform is the only one that does flip
+        do_hflip = sum(isinstance(t, T.HFlipTransform) for t in transforms.transforms) % 2 == 1
+        if do_hflip:
+            self.segm = torch.flip(self.segm, [1])
+            self._flip_segm_semantics(dp_transform_data)
+
+    def _flip_segm_semantics(self, dp_transform_data):
+        old_segm = self.segm.clone()
+        mask_label_symmetries = dp_transform_data.mask_label_symmetries
+        for i in range(self.N_BODY_PARTS):
+            if mask_label_symmetries[i + 1] != i + 1:
+                self.segm[old_segm == i + 1] = mask_label_symmetries[i + 1]
+
+
+def normalized_coords_transform(x0, y0, w, h):
+    """
+    Coordinates transform that maps top left corner to (-1, -1) and bottom
+    right corner to (1, 1). Used for torch.grid_sample to initialize the
+    grid
+    """
+
+    def f(p):
+        return (2 * (p[0] - x0) / w - 1, 2 * (p[1] - y0) / h - 1)
+
+    return f
+
+
+class DensePoseOutput(object):
+    def __init__(self, S, I, U, V, confidences):
+        """
+        Args:
+            S (`torch.Tensor`): coarse segmentation tensor of size (N, A, H, W)
+            I (`torch.Tensor`): fine segmentation tensor of size (N, C, H, W)
+            U (`torch.Tensor`): U coordinates for each fine segmentation label of size (N, C, H, W)
+            V (`torch.Tensor`): V coordinates for each fine segmentation label of size (N, C, H, W)
+            confidences (dict of str -> `torch.Tensor`) estimated confidence model parameters
+        """
+        self.S = S
+        self.I = I  # noqa: E741
+        self.U = U
+        self.V = V
+        self.confidences = confidences
+        self._check_output_dims(S, I, U, V)
+
+    def _check_output_dims(self, S, I, U, V):
+        assert (
+            len(S.size()) == 4
+        ), "Segmentation output should have 4 " "dimensions (NCHW), but has size {}".format(
+            S.size()
+        )
+        assert (
+            len(I.size()) == 4
+        ), "Segmentation output should have 4 " "dimensions (NCHW), but has size {}".format(
+            S.size()
+        )
+        assert (
+            len(U.size()) == 4
+        ), "Segmentation output should have 4 " "dimensions (NCHW), but has size {}".format(
+            S.size()
+        )
+        assert (
+            len(V.size()) == 4
+        ), "Segmentation output should have 4 " "dimensions (NCHW), but has size {}".format(
+            S.size()
+        )
+        assert len(S) == len(I), (
+            "Number of output segmentation planes {} "
+            "should be equal to the number of output part index "
+            "planes {}".format(len(S), len(I))
+        )
+        assert S.size()[2:] == I.size()[2:], (
+            "Output segmentation plane size {} "
+            "should be equal to the output part index "
+            "plane size {}".format(S.size()[2:], I.size()[2:])
+        )
+        assert I.size() == U.size(), (
+            "Part index output shape {} "
+            "should be the same as U coordinates output shape {}".format(I.size(), U.size())
+        )
+        assert I.size() == V.size(), (
+            "Part index output shape {} "
+            "should be the same as V coordinates output shape {}".format(I.size(), V.size())
+        )
+
+    def resize(self, image_size_hw):
+        # do nothing - outputs are invariant to resize
+        pass
+
+    def _crop(self, S, I, U, V, bbox_old_xywh, bbox_new_xywh):
+        """
+        Resample S, I, U, V from bbox_old to the cropped bbox_new
+        """
+        x0old, y0old, wold, hold = bbox_old_xywh
+        x0new, y0new, wnew, hnew = bbox_new_xywh
+        tr_coords = normalized_coords_transform(x0old, y0old, wold, hold)
+        topleft = (x0new, y0new)
+        bottomright = (x0new + wnew, y0new + hnew)
+        topleft_norm = tr_coords(topleft)
+        bottomright_norm = tr_coords(bottomright)
+        hsize = S.size(1)
+        wsize = S.size(2)
+        grid = torch.meshgrid(
+            torch.arange(
+                topleft_norm[1],
+                bottomright_norm[1],
+                (bottomright_norm[1] - topleft_norm[1]) / hsize,
+            )[:hsize],
+            torch.arange(
+                topleft_norm[0],
+                bottomright_norm[0],
+                (bottomright_norm[0] - topleft_norm[0]) / wsize,
+            )[:wsize],
+        )
+        grid = torch.stack(grid, dim=2).to(S.device)
+        assert (
+            grid.size(0) == hsize
+        ), "Resampled grid expected " "height={}, actual height={}".format(hsize, grid.size(0))
+        assert grid.size(1) == wsize, "Resampled grid expected " "width={}, actual width={}".format(
+            wsize, grid.size(1)
+        )
+        S_new = F.grid_sample(
+            S.unsqueeze(0),
+            torch.unsqueeze(grid, 0),
+            mode="bilinear",
+            padding_mode="border",
+            align_corners=True,
+        ).squeeze(0)
+        I_new = F.grid_sample(
+            I.unsqueeze(0),
+            torch.unsqueeze(grid, 0),
+            mode="bilinear",
+            padding_mode="border",
+            align_corners=True,
+        ).squeeze(0)
+        U_new = F.grid_sample(
+            U.unsqueeze(0),
+            torch.unsqueeze(grid, 0),
+            mode="bilinear",
+            padding_mode="border",
+            align_corners=True,
+        ).squeeze(0)
+        V_new = F.grid_sample(
+            V.unsqueeze(0),
+            torch.unsqueeze(grid, 0),
+            mode="bilinear",
+            padding_mode="border",
+            align_corners=True,
+        ).squeeze(0)
+        return S_new, I_new, U_new, V_new
+
+    def crop(self, indices_cropped, bboxes_old, bboxes_new):
+        """
+        Crop outputs for selected bounding boxes to the new bounding boxes.
+        """
+        # VK: cropping is ignored for now
+        # for i, ic in enumerate(indices_cropped):
+        #    self.S[ic], self.I[ic], self.U[ic], self.V[ic] = \
+        #        self._crop(self.S[ic], self.I[ic], self.U[ic], self.V[ic],
+        #        bboxes_old[i], bboxes_new[i])
+        pass
+
+    def hflip(self, transform_data: DensePoseTransformData) -> None:
+        """
+        Change S, I, U and V to take into account a Horizontal flip.
+        """
+        if self.I.shape[0] > 0:
+            for el in "SIUV":
+                self.__dict__[el] = torch.flip(self.__dict__[el], [3])
+            self._flip_iuv_semantics_tensor(transform_data)
+            self._flip_segm_semantics_tensor(transform_data)
+
+    def _flip_iuv_semantics_tensor(self, dp_transform_data: DensePoseTransformData) -> None:
+        point_label_symmetries = dp_transform_data.point_label_symmetries
+        uv_symmetries = dp_transform_data.uv_symmetries
+
+        N, C, H, W = self.U.shape
+        u_loc = (self.U[:, 1:, :, :].clamp(0, 1) * 255).long()
+        v_loc = (self.V[:, 1:, :, :].clamp(0, 1) * 255).long()
+        Iindex = torch.arange(C - 1, device=self.U.device)[None, :, None, None].expand(
+            N, C - 1, H, W
+        )
+        self.U[:, 1:, :, :] = uv_symmetries["U_transforms"][Iindex, v_loc, u_loc].to(
+            device=self.U.device
+        )
+        self.V[:, 1:, :, :] = uv_symmetries["V_transforms"][Iindex, v_loc, u_loc].to(
+            device=self.V.device
+        )
+
+        for el in "IUV":
+            self.__dict__[el] = self.__dict__[el][:, point_label_symmetries, :, :]
+
+    def _flip_segm_semantics_tensor(self, dp_transform_data):
+        if self.S.shape[1] == DensePoseDataRelative.N_BODY_PARTS + 1:
+            self.S = self.S[:, dp_transform_data.mask_label_symmetries, :, :]
+
+    def to_result(self, boxes_xywh):
+        """
+        Convert DensePose outputs to results format. Results are more compact,
+        but cannot be resampled any more
+        """
+        result = DensePoseResult(boxes_xywh, self.S, self.I, self.U, self.V)
+        return result
+
+    def __getitem__(self, item):
+        if isinstance(item, int):
+            S_selected = self.S[item].unsqueeze(0)
+            I_selected = self.I[item].unsqueeze(0)
+            U_selected = self.U[item].unsqueeze(0)
+            V_selected = self.V[item].unsqueeze(0)
+            conf_selected = {}
+            for key in self.confidences:
+                conf_selected[key] = self.confidences[key][item].unsqueeze(0)
+        else:
+            S_selected = self.S[item]
+            I_selected = self.I[item]
+            U_selected = self.U[item]
+            V_selected = self.V[item]
+            conf_selected = {}
+            for key in self.confidences:
+                conf_selected[key] = self.confidences[key][item]
+        return DensePoseOutput(S_selected, I_selected, U_selected, V_selected, conf_selected)
+
+    def __str__(self):
+        s = "DensePoseOutput S {}, I {}, U {}, V {}".format(
+            list(self.S.size()), list(self.I.size()), list(self.U.size()), list(self.V.size())
+        )
+        s_conf = "confidences: [{}]".format(
+            ", ".join([f"{key} {list(self.confidences[key].size())}" for key in self.confidences])
+        )
+        return ", ".join([s, s_conf])
+
+    def __len__(self):
+        return self.S.size(0)
+
+
+class DensePoseResult(object):
+    def __init__(self, boxes_xywh, S, I, U, V):
+        self.results = []
+        self.boxes_xywh = boxes_xywh.cpu().tolist()
+        assert len(boxes_xywh.size()) == 2
+        assert boxes_xywh.size(1) == 4
+        for i, box_xywh in enumerate(boxes_xywh):
+            result_i = self._output_to_result(box_xywh, S[[i]], I[[i]], U[[i]], V[[i]])
+            result_numpy_i = result_i.cpu().numpy()
+            result_encoded_i = DensePoseResult.encode_png_data(result_numpy_i)
+            result_encoded_with_shape_i = (result_numpy_i.shape, result_encoded_i)
+            self.results.append(result_encoded_with_shape_i)
+
+    def __str__(self):
+        s = "DensePoseResult: N={} [{}]".format(
+            len(self.results), ", ".join([str(list(r[0])) for r in self.results])
+        )
+        return s
+
+    def _output_to_result(self, box_xywh, S, I, U, V):
+        x, y, w, h = box_xywh
+        w = max(int(w), 1)
+        h = max(int(h), 1)
+        result = torch.zeros([3, h, w], dtype=torch.uint8, device=U.device)
+        assert (
+            len(S.size()) == 4
+        ), "AnnIndex tensor size should have {} " "dimensions but has {}".format(4, len(S.size()))
+        s_bbox = F.interpolate(S, (h, w), mode="bilinear", align_corners=False).argmax(dim=1)
+        assert (
+            len(I.size()) == 4
+        ), "IndexUV tensor size should have {} " "dimensions but has {}".format(4, len(S.size()))
+        i_bbox = (
+            F.interpolate(I, (h, w), mode="bilinear", align_corners=False).argmax(dim=1)
+            * (s_bbox > 0).long()
+        ).squeeze(0)
+        assert len(U.size()) == 4, "U tensor size should have {} " "dimensions but has {}".format(
+            4, len(U.size())
+        )
+        u_bbox = F.interpolate(U, (h, w), mode="bilinear", align_corners=False)
+        assert len(V.size()) == 4, "V tensor size should have {} " "dimensions but has {}".format(
+            4, len(V.size())
+        )
+        v_bbox = F.interpolate(V, (h, w), mode="bilinear", align_corners=False)
+        result[0] = i_bbox
+        for part_id in range(1, u_bbox.size(1)):
+            result[1][i_bbox == part_id] = (
+                (u_bbox[0, part_id][i_bbox == part_id] * 255).clamp(0, 255).to(torch.uint8)
+            )
+            result[2][i_bbox == part_id] = (
+                (v_bbox[0, part_id][i_bbox == part_id] * 255).clamp(0, 255).to(torch.uint8)
+            )
+        assert (
+            result.size(1) == h
+        ), "Results height {} should be equal" "to bounding box height {}".format(result.size(1), h)
+        assert (
+            result.size(2) == w
+        ), "Results width {} should be equal" "to bounding box width {}".format(result.size(2), w)
+        return result
+
+    @staticmethod
+    def encode_png_data(arr):
+        """
+        Encode array data as a PNG image using the highest compression rate
+        @param arr [in] Data stored in an array of size (3, M, N) of type uint8
+        @return Base64-encoded string containing PNG-compressed data
+        """
+        assert len(arr.shape) == 3, "Expected a 3D array as an input," " got a {0}D array".format(
+            len(arr.shape)
+        )
+        assert arr.shape[0] == 3, "Expected first array dimension of size 3," " got {0}".format(
+            arr.shape[0]
+        )
+        assert arr.dtype == np.uint8, "Expected an array of type np.uint8, " " got {0}".format(
+            arr.dtype
+        )
+        data = np.moveaxis(arr, 0, -1)
+        im = Image.fromarray(data)
+        fstream = BytesIO()
+        im.save(fstream, format="png", optimize=True)
+        s = base64.encodebytes(fstream.getvalue()).decode()
+        return s
+
+    @staticmethod
+    def decode_png_data(shape, s):
+        """
+        Decode array data from a string that contains PNG-compressed data
+        @param Base64-encoded string containing PNG-compressed data
+        @return Data stored in an array of size (3, M, N) of type uint8
+        """
+        fstream = BytesIO(base64.decodebytes(s.encode()))
+        im = Image.open(fstream)
+        data = np.moveaxis(np.array(im.getdata(), dtype=np.uint8), -1, 0)
+        return data.reshape(shape)
+
+    def __len__(self):
+        return len(self.results)
+
+    def __getitem__(self, item):
+        result_encoded = self.results[item]
+        bbox_xywh = self.boxes_xywh[item]
+        return result_encoded, bbox_xywh
+
+
+class DensePoseList(object):
+
+    _TORCH_DEVICE_CPU = torch.device("cpu")
+
+    def __init__(self, densepose_datas, boxes_xyxy_abs, image_size_hw, device=_TORCH_DEVICE_CPU):
+        assert len(densepose_datas) == len(
+            boxes_xyxy_abs
+        ), "Attempt to initialize DensePoseList with {} DensePose datas " "and {} boxes".format(
+            len(densepose_datas), len(boxes_xyxy_abs)
+        )
+        self.densepose_datas = []
+        for densepose_data in densepose_datas:
+            assert isinstance(densepose_data, DensePoseDataRelative) or densepose_data is None, (
+                "Attempt to initialize DensePoseList with DensePose datas "
+                "of type {}, expected DensePoseDataRelative".format(type(densepose_data))
+            )
+            densepose_data_ondevice = (
+                densepose_data.to(device) if densepose_data is not None else None
+            )
+            self.densepose_datas.append(densepose_data_ondevice)
+        self.boxes_xyxy_abs = boxes_xyxy_abs.to(device)
+        self.image_size_hw = image_size_hw
+        self.device = device
+
+    def to(self, device):
+        if self.device == device:
+            return self
+        return DensePoseList(self.densepose_datas, self.boxes_xyxy_abs, self.image_size_hw, device)
+
+    def __iter__(self):
+        return iter(self.densepose_datas)
+
+    def __len__(self):
+        return len(self.densepose_datas)
+
+    def __repr__(self):
+        s = self.__class__.__name__ + "("
+        s += "num_instances={}, ".format(len(self.densepose_datas))
+        s += "image_width={}, ".format(self.image_size_hw[1])
+        s += "image_height={})".format(self.image_size_hw[0])
+        return s
+
+    def __getitem__(self, item):
+        if isinstance(item, int):
+            densepose_data_rel = self.densepose_datas[item]
+            return densepose_data_rel
+        elif isinstance(item, slice):
+            densepose_datas_rel = self.densepose_datas[item]
+            boxes_xyxy_abs = self.boxes_xyxy_abs[item]
+            return DensePoseList(
+                densepose_datas_rel, boxes_xyxy_abs, self.image_size_hw, self.device
+            )
+        elif isinstance(item, torch.Tensor) and (item.dtype == torch.bool):
+            densepose_datas_rel = [self.densepose_datas[i] for i, x in enumerate(item) if x > 0]
+            boxes_xyxy_abs = self.boxes_xyxy_abs[item]
+            return DensePoseList(
+                densepose_datas_rel, boxes_xyxy_abs, self.image_size_hw, self.device
+            )
+        else:
+            densepose_datas_rel = [self.densepose_datas[i] for i in item]
+            boxes_xyxy_abs = self.boxes_xyxy_abs[item]
+            return DensePoseList(
+                densepose_datas_rel, boxes_xyxy_abs, self.image_size_hw, self.device
+            )
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/densepose_coco_evaluation.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/densepose_coco_evaluation.py
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/densepose_head.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/densepose_head.py
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/evaluator.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/evaluator.py
@@ -0,0 +1,158 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+import contextlib
+import copy
+import io
+import itertools
+import json
+import logging
+import os
+from collections import OrderedDict
+import torch
+from fvcore.common.file_io import PathManager
+from pycocotools.coco import COCO
+
+from detectron2.data import MetadataCatalog
+from detectron2.evaluation import DatasetEvaluator
+from detectron2.structures import BoxMode
+from detectron2.utils.comm import all_gather, is_main_process, synchronize
+from detectron2.utils.logger import create_small_table
+
+from .densepose_coco_evaluation import DensePoseCocoEval, DensePoseEvalMode
+
+
+class DensePoseCOCOEvaluator(DatasetEvaluator):
+    def __init__(self, dataset_name, distributed, output_dir=None):
+        self._distributed = distributed
+        self._output_dir = output_dir
+
+        self._cpu_device = torch.device("cpu")
+        self._logger = logging.getLogger(__name__)
+
+        self._metadata = MetadataCatalog.get(dataset_name)
+        json_file = PathManager.get_local_path(self._metadata.json_file)
+        with contextlib.redirect_stdout(io.StringIO()):
+            self._coco_api = COCO(json_file)
+
+    def reset(self):
+        self._predictions = []
+
+    def process(self, inputs, outputs):
+        """
+        Args:
+            inputs: the inputs to a COCO model (e.g., GeneralizedRCNN).
+                It is a list of dict. Each dict corresponds to an image and
+                contains keys like "height", "width", "file_name", "image_id".
+            outputs: the outputs of a COCO model. It is a list of dicts with key
+                "instances" that contains :class:`Instances`.
+                The :class:`Instances` object needs to have `densepose` field.
+        """
+        for input, output in zip(inputs, outputs):
+            instances = output["instances"].to(self._cpu_device)
+
+            boxes = instances.pred_boxes.tensor.clone()
+            boxes = BoxMode.convert(boxes, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
+            instances.pred_densepose = instances.pred_densepose.to_result(boxes)
+
+            json_results = prediction_to_json(instances, input["image_id"])
+            self._predictions.extend(json_results)
+
+    def evaluate(self):
+        if self._distributed:
+            synchronize()
+            predictions = all_gather(self._predictions)
+            predictions = list(itertools.chain(*predictions))
+            if not is_main_process():
+                return
+        else:
+            predictions = self._predictions
+
+        return copy.deepcopy(self._eval_predictions(predictions))
+
+    def _eval_predictions(self, predictions):
+        """
+        Evaluate predictions on densepose.
+        Return results with the metrics of the tasks.
+        """
+        self._logger.info("Preparing results for COCO format ...")
+
+        if self._output_dir:
+            file_path = os.path.join(self._output_dir, "coco_densepose_results.json")
+            with open(file_path, "w") as f:
+                json.dump(predictions, f)
+                f.flush()
+                os.fsync(f.fileno())
+
+        self._logger.info("Evaluating predictions ...")
+        res = OrderedDict()
+        results_gps, results_gpsm = _evaluate_predictions_on_coco(self._coco_api, predictions)
+        res["densepose_gps"] = results_gps
+        res["densepose_gpsm"] = results_gpsm
+        return res
+
+
+def prediction_to_json(instances, img_id):
+    """
+    Args:
+        instances (Instances): the output of the model
+        img_id (str): the image id in COCO
+
+    Returns:
+        list[dict]: the results in densepose evaluation format
+    """
+    scores = instances.scores.tolist()
+
+    results = []
+    for k in range(len(instances)):
+        densepose = instances.pred_densepose[k]
+        result = {
+            "image_id": img_id,
+            "category_id": 1,  # densepose only has one class
+            "bbox": densepose[1],
+            "score": scores[k],
+            "densepose": densepose,
+        }
+        results.append(result)
+    return results
+
+
+def _evaluate_predictions_on_coco(coco_gt, coco_results):
+    metrics = ["AP", "AP50", "AP75", "APm", "APl"]
+
+    logger = logging.getLogger(__name__)
+
+    if len(coco_results) == 0:  # cocoapi does not handle empty results very well
+        logger.warn("No predictions from the model! Set scores to -1")
+        results_gps = {metric: -1 for metric in metrics}
+        results_gpsm = {metric: -1 for metric in metrics}
+        return results_gps, results_gpsm
+
+    coco_dt = coco_gt.loadRes(coco_results)
+    results_gps = _evaluate_predictions_on_coco_gps(coco_gt, coco_dt, metrics)
+    logger.info(
+        "Evaluation results for densepose, GPS metric: \n" + create_small_table(results_gps)
+    )
+    results_gpsm = _evaluate_predictions_on_coco_gpsm(coco_gt, coco_dt, metrics)
+    logger.info(
+        "Evaluation results for densepose, GPSm metric: \n" + create_small_table(results_gpsm)
+    )
+    return results_gps, results_gpsm
+
+
+def _evaluate_predictions_on_coco_gps(coco_gt, coco_dt, metrics):
+    coco_eval = DensePoseCocoEval(coco_gt, coco_dt, "densepose", dpEvalMode=DensePoseEvalMode.GPS)
+    coco_eval.evaluate()
+    coco_eval.accumulate()
+    coco_eval.summarize()
+    results = {metric: float(coco_eval.stats[idx] * 100) for idx, metric in enumerate(metrics)}
+    return results
+
+
+def _evaluate_predictions_on_coco_gpsm(coco_gt, coco_dt, metrics):
+    coco_eval = DensePoseCocoEval(coco_gt, coco_dt, "densepose", dpEvalMode=DensePoseEvalMode.GPSM)
+    coco_eval.evaluate()
+    coco_eval.accumulate()
+    coco_eval.summarize()
+    results = {metric: float(coco_eval.stats[idx] * 100) for idx, metric in enumerate(metrics)}
+    return results
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/modeling/test_time_augmentation.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/modeling/test_time_augmentation.py
@@ -0,0 +1,75 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+from detectron2.modeling.test_time_augmentation import GeneralizedRCNNWithTTA
+
+
+class DensePoseGeneralizedRCNNWithTTA(GeneralizedRCNNWithTTA):
+    def __init__(self, cfg, model, transform_data, tta_mapper=None, batch_size=1):
+        """
+        Args:
+            cfg (CfgNode):
+            model (GeneralizedRCNN): a GeneralizedRCNN to apply TTA on.
+            transform_data (DensePoseTransformData): contains symmetry label
+                transforms used for horizontal flip
+            tta_mapper (callable): takes a dataset dict and returns a list of
+                augmented versions of the dataset dict. Defaults to
+                `DatasetMapperTTA(cfg)`.
+            batch_size (int): batch the augmented images into this batch size for inference.
+        """
+        self._transform_data = transform_data
+        super().__init__(cfg=cfg, model=model, tta_mapper=tta_mapper, batch_size=batch_size)
+
+    # the implementation follows closely the one from detectron2/modeling
+    def _inference_one_image(self, input):
+        """
+        Args:
+            input (dict): one dataset dict
+
+        Returns:
+            dict: one output dict
+        """
+
+        augmented_inputs, aug_vars = self._get_augmented_inputs(input)
+        # Detect boxes from all augmented versions
+        with self._turn_off_roi_heads(["mask_on", "keypoint_on", "densepose_on"]):
+            # temporarily disable roi heads
+            all_boxes, all_scores, all_classes = self._get_augmented_boxes(
+                augmented_inputs, aug_vars
+            )
+        merged_instances = self._merge_detections(
+            all_boxes, all_scores, all_classes, (aug_vars["height"], aug_vars["width"])
+        )
+
+        if self.cfg.MODEL.MASK_ON or self.cfg.MODEL.DENSEPOSE_ON:
+            # Use the detected boxes to obtain new fields
+            augmented_instances = self._rescale_detected_boxes(
+                augmented_inputs, merged_instances, aug_vars
+            )
+            # run forward on the detected boxes
+            outputs = self._batch_inference(
+                augmented_inputs, augmented_instances, do_postprocess=False
+            )
+            # Delete now useless variables to avoid being out of memory
+            del augmented_inputs, augmented_instances, merged_instances
+            # average the predictions
+            if self.cfg.MODEL.MASK_ON:
+                outputs[0].pred_masks = self._reduce_pred_masks(outputs, aug_vars)
+            if self.cfg.MODEL.DENSEPOSE_ON:
+                outputs[0].pred_densepose = self._reduce_pred_densepose(outputs, aug_vars)
+            # postprocess
+            output = self._detector_postprocess(outputs[0], aug_vars)
+            return {"instances": output}
+        else:
+            return {"instances": merged_instances}
+
+    def _reduce_pred_densepose(self, outputs, aug_vars):
+        for idx, output in enumerate(outputs):
+            if aug_vars["do_hflip"][idx]:
+                output.pred_densepose.hflip(self._transform_data)
+        # Less memory-intensive averaging
+        for attr in "SIUV":
+            setattr(
+                outputs[0].pred_densepose,
+                attr,
+                sum(getattr(o.pred_densepose, attr) for o in outputs) / len(outputs),
+            )
+        return outputs[0].pred_densepose
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/roi_head.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/roi_head.py
@@ -0,0 +1,213 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+import numpy as np
+from typing import Dict
+import fvcore.nn.weight_init as weight_init
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+
+from detectron2.layers import Conv2d, ShapeSpec, get_norm
+from detectron2.modeling import ROI_HEADS_REGISTRY, StandardROIHeads
+from detectron2.modeling.poolers import ROIPooler
+from detectron2.modeling.roi_heads import select_foreground_proposals
+
+from .densepose_head import (
+    build_densepose_data_filter,
+    build_densepose_head,
+    build_densepose_losses,
+    build_densepose_predictor,
+    densepose_inference,
+)
+
+
+class Decoder(nn.Module):
+    """
+    A semantic segmentation head described in detail in the Panoptic Feature Pyramid Networks paper
+    (https://arxiv.org/abs/1901.02446). It takes FPN features as input and merges information from
+    all levels of the FPN into single output.
+    """
+
+    def __init__(self, cfg, input_shape: Dict[str, ShapeSpec], in_features):
+        super(Decoder, self).__init__()
+
+        # fmt: off
+        self.in_features      = in_features
+        feature_strides       = {k: v.stride for k, v in input_shape.items()}
+        feature_channels      = {k: v.channels for k, v in input_shape.items()}
+        num_classes           = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NUM_CLASSES
+        conv_dims             = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_CONV_DIMS
+        self.common_stride    = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_COMMON_STRIDE
+        norm                  = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NORM
+        # fmt: on
+
+        self.scale_heads = []
+        for in_feature in self.in_features:
+            head_ops = []
+            head_length = max(
+                1, int(np.log2(feature_strides[in_feature]) - np.log2(self.common_stride))
+            )
+            for k in range(head_length):
+                conv = Conv2d(
+                    feature_channels[in_feature] if k == 0 else conv_dims,
+                    conv_dims,
+                    kernel_size=3,
+                    stride=1,
+                    padding=1,
+                    bias=not norm,
+                    norm=get_norm(norm, conv_dims),
+                    activation=F.relu,
+                )
+                weight_init.c2_msra_fill(conv)
+                head_ops.append(conv)
+                if feature_strides[in_feature] != self.common_stride:
+                    head_ops.append(
+                        nn.Upsample(scale_factor=2, mode="bilinear", align_corners=False)
+                    )
+            self.scale_heads.append(nn.Sequential(*head_ops))
+            self.add_module(in_feature, self.scale_heads[-1])
+        self.predictor = Conv2d(conv_dims, num_classes, kernel_size=1, stride=1, padding=0)
+        weight_init.c2_msra_fill(self.predictor)
+
+    def forward(self, features):
+        for i, _ in enumerate(self.in_features):
+            if i == 0:
+                x = self.scale_heads[i](features[i])
+            else:
+                x = x + self.scale_heads[i](features[i])
+        x = self.predictor(x)
+        return x
+
+
+@ROI_HEADS_REGISTRY.register()
+class DensePoseROIHeads(StandardROIHeads):
+    """
+    A Standard ROIHeads which contains an addition of DensePose head.
+    """
+
+    def __init__(self, cfg, input_shape):
+        super().__init__(cfg, input_shape)
+        self._init_densepose_head(cfg, input_shape)
+
+    def _init_densepose_head(self, cfg, input_shape):
+        # fmt: off
+        self.densepose_on          = cfg.MODEL.DENSEPOSE_ON
+        if not self.densepose_on:
+            return
+        self.densepose_data_filter = build_densepose_data_filter(cfg)
+        dp_pooler_resolution       = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_RESOLUTION
+        dp_pooler_sampling_ratio   = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_SAMPLING_RATIO
+        dp_pooler_type             = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_TYPE
+        self.use_decoder           = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_ON
+        # fmt: on
+        if self.use_decoder:
+            dp_pooler_scales = (1.0 / input_shape[self.in_features[0]].stride,)
+        else:
+            dp_pooler_scales = tuple(1.0 / input_shape[k].stride for k in self.in_features)
+        in_channels = [input_shape[f].channels for f in self.in_features][0]
+
+        if self.use_decoder:
+            self.decoder = Decoder(cfg, input_shape, self.in_features)
+
+        self.densepose_pooler = ROIPooler(
+            output_size=dp_pooler_resolution,
+            scales=dp_pooler_scales,
+            sampling_ratio=dp_pooler_sampling_ratio,
+            pooler_type=dp_pooler_type,
+        )
+        self.densepose_head = build_densepose_head(cfg, in_channels)
+        self.densepose_predictor = build_densepose_predictor(
+            cfg, self.densepose_head.n_out_channels
+        )
+        self.densepose_losses = build_densepose_losses(cfg)
+
+    def _forward_densepose(self, features, instances):
+        """
+        Forward logic of the densepose prediction branch.
+
+        Args:
+            features (list[Tensor]): #level input features for densepose prediction
+            instances (list[Instances]): the per-image instances to train/predict densepose.
+                In training, they can be the proposals.
+                In inference, they can be the predicted boxes.
+
+        Returns:
+            In training, a dict of losses.
+            In inference, update `instances` with new fields "densepose" and return it.
+        """
+        if not self.densepose_on:
+            return {} if self.training else instances
+
+        features = [features[f] for f in self.in_features]
+        if self.training:
+            proposals, _ = select_foreground_proposals(instances, self.num_classes)
+            proposals_dp = self.densepose_data_filter(proposals)
+            if len(proposals_dp) > 0:
+                # NOTE may deadlock in DDP if certain workers have empty proposals_dp
+                proposal_boxes = [x.proposal_boxes for x in proposals_dp]
+
+                if self.use_decoder:
+                    features = [self.decoder(features)]
+
+                features_dp = self.densepose_pooler(features, proposal_boxes)
+                densepose_head_outputs = self.densepose_head(features_dp)
+                densepose_outputs, _, confidences, _ = self.densepose_predictor(
+                    densepose_head_outputs
+                )
+                densepose_loss_dict = self.densepose_losses(
+                    proposals_dp, densepose_outputs, confidences
+                )
+                return densepose_loss_dict
+        else:
+            pred_boxes = [x.pred_boxes for x in instances]
+
+            if self.use_decoder:
+                features = [self.decoder(features)]
+
+            features_dp = self.densepose_pooler(features, pred_boxes)
+            if len(features_dp) > 0:
+                densepose_head_outputs = self.densepose_head(features_dp)
+                densepose_outputs, _, confidences, _ = self.densepose_predictor(
+                    densepose_head_outputs
+                )
+            else:
+                # If no detection occurred instances
+                # set densepose_outputs to empty tensors
+                empty_tensor = torch.zeros(size=(0, 0, 0, 0), device=features_dp.device)
+                densepose_outputs = tuple([empty_tensor] * 4)
+                confidences = tuple([empty_tensor] * 4)
+
+            densepose_inference(densepose_outputs, confidences, instances)
+            return instances
+
+    def forward(self, images, features, proposals, targets=None):
+        instances, losses = super().forward(images, features, proposals, targets)
+        del targets, images
+
+        if self.training:
+            losses.update(self._forward_densepose(features, instances))
+        return instances, losses
+
+    def forward_with_given_boxes(self, features, instances):
+        """
+        Use the given boxes in `instances` to produce other (non-box) per-ROI outputs.
+
+        This is useful for downstream tasks where a box is known, but need to obtain
+        other attributes (outputs of other heads).
+        Test-time augmentation also uses this.
+
+        Args:
+            features: same as in `forward()`
+            instances (list[Instances]): instances to predict other outputs. Expect the keys
+                "pred_boxes" and "pred_classes" to exist.
+
+        Returns:
+            instances (list[Instances]):
+                the same `Instances` objects, with extra
+                fields such as `pred_masks` or `pred_keypoints`.
+        """
+
+        instances = super().forward_with_given_boxes(features, instances)
+        instances = self._forward_densepose(features, instances)
+        return instances
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/utils/dbhelper.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/utils/dbhelper.py
@@ -0,0 +1,145 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+from typing import Any, Dict, Optional, Tuple
+
+
+class EntrySelector(object):
+    """
+    Base class for entry selectors
+    """
+
+    @staticmethod
+    def from_string(spec: str) -> "EntrySelector":
+        if spec == "*":
+            return AllEntrySelector()
+        return FieldEntrySelector(spec)
+
+
+class AllEntrySelector(EntrySelector):
+    """
+    Selector that accepts all entries
+    """
+
+    SPECIFIER = "*"
+
+    def __call__(self, entry):
+        return True
+
+
+class FieldEntrySelector(EntrySelector):
+    """
+    Selector that accepts only entries that match provided field
+    specifier(s). Only a limited set of specifiers is supported for now:
+      <specifiers>::=<specifier>[<comma><specifiers>]
+      <specifier>::=<field_name>[<type_delim><type>]<equal><value_or_range>
+      <field_name> is a valid identifier
+      <type> ::= "int" | "str"
+      <equal> ::= "="
+      <comma> ::= ","
+      <type_delim> ::= ":"
+      <value_or_range> ::= <value> | <range>
+      <range> ::= <value><range_delim><value>
+      <range_delim> ::= "-"
+      <value> is a string without spaces and special symbols
+        (e.g. <comma>, <equal>, <type_delim>, <range_delim>)
+    """
+
+    _SPEC_DELIM = ","
+    _TYPE_DELIM = ":"
+    _RANGE_DELIM = "-"
+    _EQUAL = "="
+    _ERROR_PREFIX = "Invalid field selector specifier"
+
+    class _FieldEntryValuePredicate(object):
+        """
+        Predicate that checks strict equality for the specified entry field
+        """
+
+        def __init__(self, name: str, typespec: str, value: str):
+            import builtins
+
+            self.name = name
+            self.type = getattr(builtins, typespec) if typespec is not None else str
+            self.value = value
+
+        def __call__(self, entry):
+            return entry[self.name] == self.type(self.value)
+
+    class _FieldEntryRangePredicate(object):
+        """
+        Predicate that checks whether an entry field falls into the specified range
+        """
+
+        def __init__(self, name: str, typespec: str, vmin: str, vmax: str):
+            import builtins
+
+            self.name = name
+            self.type = getattr(builtins, typespec) if typespec is not None else str
+            self.vmin = vmin
+            self.vmax = vmax
+
+        def __call__(self, entry):
+            return (entry[self.name] >= self.type(self.vmin)) and (
+                entry[self.name] <= self.type(self.vmax)
+            )
+
+    def __init__(self, spec: str):
+        self._predicates = self._parse_specifier_into_predicates(spec)
+
+    def __call__(self, entry: Dict[str, Any]):
+        for predicate in self._predicates:
+            if not predicate(entry):
+                return False
+        return True
+
+    def _parse_specifier_into_predicates(self, spec: str):
+        predicates = []
+        specs = spec.split(self._SPEC_DELIM)
+        for subspec in specs:
+            eq_idx = subspec.find(self._EQUAL)
+            if eq_idx > 0:
+                field_name_with_type = subspec[:eq_idx]
+                field_name, field_type = self._parse_field_name_type(field_name_with_type)
+                field_value_or_range = subspec[eq_idx + 1 :]
+                if self._is_range_spec(field_value_or_range):
+                    vmin, vmax = self._get_range_spec(field_value_or_range)
+                    predicate = FieldEntrySelector._FieldEntryRangePredicate(
+                        field_name, field_type, vmin, vmax
+                    )
+                else:
+                    predicate = FieldEntrySelector._FieldEntryValuePredicate(
+                        field_name, field_type, field_value_or_range
+                    )
+                predicates.append(predicate)
+            elif eq_idx == 0:
+                self._parse_error(f'"{subspec}", field name is empty!')
+            else:
+                self._parse_error(f'"{subspec}", should have format ' "<field>=<value_or_range>!")
+        return predicates
+
+    def _parse_field_name_type(self, field_name_with_type: str) -> Tuple[str, Optional[str]]:
+        type_delim_idx = field_name_with_type.find(self._TYPE_DELIM)
+        if type_delim_idx > 0:
+            field_name = field_name_with_type[:type_delim_idx]
+            field_type = field_name_with_type[type_delim_idx + 1 :]
+        elif type_delim_idx == 0:
+            self._parse_error(f'"{field_name_with_type}", field name is empty!')
+        else:
+            field_name = field_name_with_type
+            field_type = None
+        return field_name, field_type
+
+    def _is_range_spec(self, field_value_or_range):
+        delim_idx = field_value_or_range.find(self._RANGE_DELIM)
+        return delim_idx > 0
+
+    def _get_range_spec(self, field_value_or_range):
+        if self._is_range_spec(field_value_or_range):
+            delim_idx = field_value_or_range.find(self._RANGE_DELIM)
+            vmin = field_value_or_range[:delim_idx]
+            vmax = field_value_or_range[delim_idx + 1 :]
+            return vmin, vmax
+        else:
+            self._parse_error('"field_value_or_range", range of values expected!')
+
+    def _parse_error(self, msg):
+        raise ValueError(f"{self._ERROR_PREFIX}: {msg}")
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/utils/logger.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/utils/logger.py
@@ -0,0 +1,13 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+import logging
+
+
+def verbosity_to_level(verbosity):
+    if verbosity is not None:
+        if verbosity == 0:
+            return logging.WARNING
+        elif verbosity == 1:
+            return logging.INFO
+        elif verbosity >= 2:
+            return logging.DEBUG
+    return logging.WARNING
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/utils/transform.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/utils/transform.py
@@ -0,0 +1,16 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+from fvcore.common.file_io import PathManager
+
+from detectron2.data import MetadataCatalog
+
+from densepose import DensePoseTransformData
+
+
+def load_for_dataset(dataset_name):
+    path = MetadataCatalog.get(dataset_name).densepose_transform_src
+    densepose_transform_data_fpath = PathManager.get_local_path(path)
+    return DensePoseTransformData.load(densepose_transform_data_fpath)
+
+
+def load_from_cfg(cfg):
+    return load_for_dataset(cfg.DATASETS.TEST[0])
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/vis/base.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/vis/base.py
@@ -0,0 +1,191 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+import logging
+import numpy as np
+import cv2
+import torch
+
+Image = np.ndarray
+Boxes = torch.Tensor
+
+
+class MatrixVisualizer(object):
+    """
+    Base visualizer for matrix data
+    """
+
+    def __init__(
+        self,
+        inplace=True,
+        cmap=cv2.COLORMAP_PARULA,
+        val_scale=1.0,
+        alpha=0.7,
+        interp_method_matrix=cv2.INTER_LINEAR,
+        interp_method_mask=cv2.INTER_NEAREST,
+    ):
+        self.inplace = inplace
+        self.cmap = cmap
+        self.val_scale = val_scale
+        self.alpha = alpha
+        self.interp_method_matrix = interp_method_matrix
+        self.interp_method_mask = interp_method_mask
+
+    def visualize(self, image_bgr, mask, matrix, bbox_xywh):
+        self._check_image(image_bgr)
+        self._check_mask_matrix(mask, matrix)
+        if self.inplace:
+            image_target_bgr = image_bgr
+        else:
+            image_target_bgr = image_bgr * 0
+        x, y, w, h = [int(v) for v in bbox_xywh]
+        if w <= 0 or h <= 0:
+            return image_bgr
+        mask, matrix = self._resize(mask, matrix, w, h)
+        mask_bg = np.tile((mask == 0)[:, :, np.newaxis], [1, 1, 3])
+        matrix_scaled = matrix.astype(np.float32) * self.val_scale
+        _EPSILON = 1e-6
+        if np.any(matrix_scaled > 255 + _EPSILON):
+            logger = logging.getLogger(__name__)
+            logger.warning(
+                f"Matrix has values > {255 + _EPSILON} after " f"scaling, clipping to [0..255]"
+            )
+        matrix_scaled_8u = matrix_scaled.clip(0, 255).astype(np.uint8)
+        matrix_vis = cv2.applyColorMap(matrix_scaled_8u, self.cmap)
+        matrix_vis[mask_bg] = image_target_bgr[y : y + h, x : x + w, :][mask_bg]
+        image_target_bgr[y : y + h, x : x + w, :] = (
+            image_target_bgr[y : y + h, x : x + w, :] * (1.0 - self.alpha) + matrix_vis * self.alpha
+        )
+        return image_target_bgr.astype(np.uint8)
+
+    def _resize(self, mask, matrix, w, h):
+        if (w != mask.shape[1]) or (h != mask.shape[0]):
+            mask = cv2.resize(mask, (w, h), self.interp_method_mask)
+        if (w != matrix.shape[1]) or (h != matrix.shape[0]):
+            matrix = cv2.resize(matrix, (w, h), self.interp_method_matrix)
+        return mask, matrix
+
+    def _check_image(self, image_rgb):
+        assert len(image_rgb.shape) == 3
+        assert image_rgb.shape[2] == 3
+        assert image_rgb.dtype == np.uint8
+
+    def _check_mask_matrix(self, mask, matrix):
+        assert len(matrix.shape) == 2
+        assert len(mask.shape) == 2
+        assert mask.dtype == np.uint8
+
+
+class RectangleVisualizer(object):
+
+    _COLOR_GREEN = (18, 127, 15)
+
+    def __init__(self, color=_COLOR_GREEN, thickness=1):
+        self.color = color
+        self.thickness = thickness
+
+    def visualize(self, image_bgr, bbox_xywh, color=None, thickness=None):
+        x, y, w, h = bbox_xywh
+        color = color or self.color
+        thickness = thickness or self.thickness
+        cv2.rectangle(image_bgr, (int(x), int(y)), (int(x + w), int(y + h)), color, thickness)
+        return image_bgr
+
+
+class PointsVisualizer(object):
+
+    _COLOR_GREEN = (18, 127, 15)
+
+    def __init__(self, color_bgr=_COLOR_GREEN, r=5):
+        self.color_bgr = color_bgr
+        self.r = r
+
+    def visualize(self, image_bgr, pts_xy, colors_bgr=None, rs=None):
+        for j, pt_xy in enumerate(pts_xy):
+            x, y = pt_xy
+            color_bgr = colors_bgr[j] if colors_bgr is not None else self.color_bgr
+            r = rs[j] if rs is not None else self.r
+            cv2.circle(image_bgr, (x, y), r, color_bgr, -1)
+        return image_bgr
+
+
+class TextVisualizer(object):
+
+    _COLOR_GRAY = (218, 227, 218)
+    _COLOR_WHITE = (255, 255, 255)
+
+    def __init__(
+        self,
+        font_face=cv2.FONT_HERSHEY_SIMPLEX,
+        font_color_bgr=_COLOR_GRAY,
+        font_scale=0.35,
+        font_line_type=cv2.LINE_AA,
+        font_line_thickness=1,
+        fill_color_bgr=_COLOR_WHITE,
+        fill_color_transparency=1.0,
+        frame_color_bgr=_COLOR_WHITE,
+        frame_color_transparency=1.0,
+        frame_thickness=1,
+    ):
+        self.font_face = font_face
+        self.font_color_bgr = font_color_bgr
+        self.font_scale = font_scale
+        self.font_line_type = font_line_type
+        self.font_line_thickness = font_line_thickness
+        self.fill_color_bgr = fill_color_bgr
+        self.fill_color_transparency = fill_color_transparency
+        self.frame_color_bgr = frame_color_bgr
+        self.frame_color_transparency = frame_color_transparency
+        self.frame_thickness = frame_thickness
+
+    def visualize(self, image_bgr, txt, topleft_xy):
+        txt_w, txt_h = self.get_text_size_wh(txt)
+        topleft_xy = tuple(map(int, topleft_xy))
+        x, y = topleft_xy
+        if self.frame_color_transparency < 1.0:
+            t = self.frame_thickness
+            image_bgr[y - t : y + txt_h + t, x - t : x + txt_w + t, :] = (
+                image_bgr[y - t : y + txt_h + t, x - t : x + txt_w + t, :]
+                * self.frame_color_transparency
+                + np.array(self.frame_color_bgr) * (1.0 - self.frame_color_transparency)
+            ).astype(np.float)
+        if self.fill_color_transparency < 1.0:
+            image_bgr[y : y + txt_h, x : x + txt_w, :] = (
+                image_bgr[y : y + txt_h, x : x + txt_w, :] * self.fill_color_transparency
+                + np.array(self.fill_color_bgr) * (1.0 - self.fill_color_transparency)
+            ).astype(np.float)
+        cv2.putText(
+            image_bgr,
+            txt,
+            topleft_xy,
+            self.font_face,
+            self.font_scale,
+            self.font_color_bgr,
+            self.font_line_thickness,
+            self.font_line_type,
+        )
+        return image_bgr
+
+    def get_text_size_wh(self, txt):
+        ((txt_w, txt_h), _) = cv2.getTextSize(
+            txt, self.font_face, self.font_scale, self.font_line_thickness
+        )
+        return txt_w, txt_h
+
+
+class CompoundVisualizer(object):
+    def __init__(self, visualizers):
+        self.visualizers = visualizers
+
+    def visualize(self, image_bgr, data):
+        assert len(data) == len(
+            self.visualizers
+        ), "The number of datas {} should match the number of visualizers" " {}".format(
+            len(data), len(self.visualizers)
+        )
+        image = image_bgr
+        for i, visualizer in enumerate(self.visualizers):
+            image = visualizer.visualize(image, data[i])
+        return image
+
+    def __str__(self):
+        visualizer_str = ", ".join([str(v) for v in self.visualizers])
+        return "Compound Visualizer [{}]".format(visualizer_str)
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/vis/bounding_box.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/vis/bounding_box.py
@@ -0,0 +1,37 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+from .base import RectangleVisualizer, TextVisualizer
+
+
+class BoundingBoxVisualizer(object):
+    def __init__(self):
+        self.rectangle_visualizer = RectangleVisualizer()
+
+    def visualize(self, image_bgr, boxes_xywh):
+        for bbox_xywh in boxes_xywh:
+            image_bgr = self.rectangle_visualizer.visualize(image_bgr, bbox_xywh)
+        return image_bgr
+
+
+class ScoredBoundingBoxVisualizer(object):
+    def __init__(self, bbox_visualizer_params=None, score_visualizer_params=None):
+        if bbox_visualizer_params is None:
+            bbox_visualizer_params = {}
+        if score_visualizer_params is None:
+            score_visualizer_params = {}
+        self.visualizer_bbox = RectangleVisualizer(**bbox_visualizer_params)
+        self.visualizer_score = TextVisualizer(**score_visualizer_params)
+
+    def visualize(self, image_bgr, scored_bboxes):
+        boxes_xywh, box_scores = scored_bboxes
+        assert len(boxes_xywh) == len(
+            box_scores
+        ), "Number of bounding boxes {} should be equal to the number of scores {}".format(
+            len(boxes_xywh), len(box_scores)
+        )
+        for i, box_xywh in enumerate(boxes_xywh):
+            score_i = box_scores[i]
+            image_bgr = self.visualizer_bbox.visualize(image_bgr, box_xywh)
+            score_txt = "{0:6.4f}".format(score_i)
+            topleft_xy = box_xywh[0], box_xywh[1]
+            image_bgr = self.visualizer_score.visualize(image_bgr, score_txt, topleft_xy)
+        return image_bgr
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/vis/densepose.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/vis/densepose.py
@@ -0,0 +1,593 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+import logging
+import numpy as np
+from typing import Iterable, Optional, Tuple
+import cv2
+
+from ..data.structures import DensePoseDataRelative, DensePoseOutput, DensePoseResult
+from .base import Boxes, Image, MatrixVisualizer, PointsVisualizer
+
+
+class DensePoseResultsVisualizer(object):
+    def visualize(self, image_bgr: Image, densepose_result: Optional[DensePoseResult]) -> Image:
+        if densepose_result is None:
+            return image_bgr
+        context = self.create_visualization_context(image_bgr)
+        for i, result_encoded_w_shape in enumerate(densepose_result.results):
+            iuv_arr = DensePoseResult.decode_png_data(*result_encoded_w_shape)
+            bbox_xywh = densepose_result.boxes_xywh[i]
+            self.visualize_iuv_arr(context, iuv_arr, bbox_xywh)
+        image_bgr = self.context_to_image_bgr(context)
+        return image_bgr
+
+
+class DensePoseMaskedColormapResultsVisualizer(DensePoseResultsVisualizer):
+    def __init__(
+        self,
+        data_extractor,
+        segm_extractor,
+        inplace=True,
+        cmap=cv2.COLORMAP_PARULA,
+        alpha=0.7,
+        val_scale=1.0,
+    ):
+        self.mask_visualizer = MatrixVisualizer(
+            inplace=inplace, cmap=cmap, val_scale=val_scale, alpha=alpha
+        )
+        self.data_extractor = data_extractor
+        self.segm_extractor = segm_extractor
+
+    def create_visualization_context(self, image_bgr: Image):
+        return image_bgr
+
+    def context_to_image_bgr(self, context):
+        return context
+
+    def get_image_bgr_from_context(self, context):
+        return context
+
+    def visualize_iuv_arr(self, context, iuv_arr, bbox_xywh):
+        image_bgr = self.get_image_bgr_from_context(context)
+        matrix = self.data_extractor(iuv_arr)
+        segm = self.segm_extractor(iuv_arr)
+        mask = np.zeros(matrix.shape, dtype=np.uint8)
+        mask[segm > 0] = 1
+        image_bgr = self.mask_visualizer.visualize(image_bgr, mask, matrix, bbox_xywh)
+        return image_bgr
+
+
+def _extract_i_from_iuvarr(iuv_arr):
+    return iuv_arr[0, :, :]
+
+
+def _extract_u_from_iuvarr(iuv_arr):
+    return iuv_arr[1, :, :]
+
+
+def _extract_v_from_iuvarr(iuv_arr):
+    return iuv_arr[2, :, :]
+
+
+class DensePoseResultsMplContourVisualizer(DensePoseResultsVisualizer):
+    def __init__(self, levels=10, **kwargs):
+        self.levels = levels
+        self.plot_args = kwargs
+
+    def create_visualization_context(self, image_bgr: Image):
+        import matplotlib.pyplot as plt
+        from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
+
+        context = {}
+        context["image_bgr"] = image_bgr
+        dpi = 100
+        height_inches = float(image_bgr.shape[0]) / dpi
+        width_inches = float(image_bgr.shape[1]) / dpi
+        fig = plt.figure(figsize=(width_inches, height_inches), dpi=dpi)
+        plt.axes([0, 0, 1, 1])
+        plt.axis("off")
+        context["fig"] = fig
+        canvas = FigureCanvas(fig)
+        context["canvas"] = canvas
+        extent = (0, image_bgr.shape[1], image_bgr.shape[0], 0)
+        plt.imshow(image_bgr[:, :, ::-1], extent=extent)
+        return context
+
+    def context_to_image_bgr(self, context):
+        fig = context["fig"]
+        w, h = map(int, fig.get_size_inches() * fig.get_dpi())
+        canvas = context["canvas"]
+        canvas.draw()
+        image_1d = np.fromstring(canvas.tostring_rgb(), dtype="uint8")
+        image_rgb = image_1d.reshape(h, w, 3)
+        image_bgr = image_rgb[:, :, ::-1].copy()
+        return image_bgr
+
+    def visualize_iuv_arr(self, context, iuv_arr: np.ndarray, bbox_xywh: Boxes) -> Image:
+        import matplotlib.pyplot as plt
+
+        u = _extract_u_from_iuvarr(iuv_arr).astype(float) / 255.0
+        v = _extract_v_from_iuvarr(iuv_arr).astype(float) / 255.0
+        extent = (
+            bbox_xywh[0],
+            bbox_xywh[0] + bbox_xywh[2],
+            bbox_xywh[1],
+            bbox_xywh[1] + bbox_xywh[3],
+        )
+        plt.contour(u, self.levels, extent=extent, **self.plot_args)
+        plt.contour(v, self.levels, extent=extent, **self.plot_args)
+
+
+class DensePoseResultsCustomContourVisualizer(DensePoseResultsVisualizer):
+    """
+    Contour visualization using marching squares
+    """
+
+    def __init__(self, levels=10, **kwargs):
+        # TODO: colormap is hardcoded
+        cmap = cv2.COLORMAP_PARULA
+        if isinstance(levels, int):
+            self.levels = np.linspace(0, 1, levels)
+        else:
+            self.levels = levels
+        if "linewidths" in kwargs:
+            self.linewidths = kwargs["linewidths"]
+        else:
+            self.linewidths = [1] * len(self.levels)
+        self.plot_args = kwargs
+        img_colors_bgr = cv2.applyColorMap((self.levels * 255).astype(np.uint8), cmap)
+        self.level_colors_bgr = [
+            [int(v) for v in img_color_bgr.ravel()] for img_color_bgr in img_colors_bgr
+        ]
+
+    def create_visualization_context(self, image_bgr: Image):
+        return image_bgr
+
+    def context_to_image_bgr(self, context):
+        return context
+
+    def get_image_bgr_from_context(self, context):
+        return context
+
+    def visualize_iuv_arr(self, context, iuv_arr: np.ndarray, bbox_xywh: Boxes) -> Image:
+        image_bgr = self.get_image_bgr_from_context(context)
+        segm = _extract_i_from_iuvarr(iuv_arr)
+        u = _extract_u_from_iuvarr(iuv_arr).astype(float) / 255.0
+        v = _extract_v_from_iuvarr(iuv_arr).astype(float) / 255.0
+        self._contours(image_bgr, u, segm, bbox_xywh)
+        self._contours(image_bgr, v, segm, bbox_xywh)
+
+    def _contours(self, image_bgr, arr, segm, bbox_xywh):
+        for part_idx in range(1, DensePoseDataRelative.N_PART_LABELS + 1):
+            mask = segm == part_idx
+            if not np.any(mask):
+                continue
+            arr_min = np.amin(arr[mask])
+            arr_max = np.amax(arr[mask])
+            I, J = np.nonzero(mask)
+            i0 = np.amin(I)
+            i1 = np.amax(I) + 1
+            j0 = np.amin(J)
+            j1 = np.amax(J) + 1
+            if (j1 == j0 + 1) or (i1 == i0 + 1):
+                continue
+            Nw = arr.shape[1] - 1
+            Nh = arr.shape[0] - 1
+            for level_idx, level in enumerate(self.levels):
+                if (level < arr_min) or (level > arr_max):
+                    continue
+                vp = arr[i0:i1, j0:j1] >= level
+                bin_codes = vp[:-1, :-1] + vp[1:, :-1] * 2 + vp[1:, 1:] * 4 + vp[:-1, 1:] * 8
+                mp = mask[i0:i1, j0:j1]
+                bin_mask_codes = mp[:-1, :-1] + mp[1:, :-1] * 2 + mp[1:, 1:] * 4 + mp[:-1, 1:] * 8
+                it = np.nditer(bin_codes, flags=["multi_index"])
+                color_bgr = self.level_colors_bgr[level_idx]
+                linewidth = self.linewidths[level_idx]
+                while not it.finished:
+                    if (it[0] != 0) and (it[0] != 15):
+                        i, j = it.multi_index
+                        if bin_mask_codes[i, j] != 0:
+                            self._draw_line(
+                                image_bgr,
+                                arr,
+                                mask,
+                                level,
+                                color_bgr,
+                                linewidth,
+                                it[0],
+                                it.multi_index,
+                                bbox_xywh,
+                                Nw,
+                                Nh,
+                                (i0, j0),
+                            )
+                    it.iternext()
+
+    def _draw_line(
+        self,
+        image_bgr,
+        arr,
+        mask,
+        v,
+        color_bgr,
+        linewidth,
+        bin_code,
+        multi_idx,
+        bbox_xywh,
+        Nw,
+        Nh,
+        offset,
+    ):
+        lines = self._bin_code_2_lines(arr, v, bin_code, multi_idx, Nw, Nh, offset)
+        x0, y0, w, h = bbox_xywh
+        x1 = x0 + w
+        y1 = y0 + h
+        for line in lines:
+            x0r, y0r = line[0]
+            x1r, y1r = line[1]
+            pt0 = (int(x0 + x0r * (x1 - x0)), int(y0 + y0r * (y1 - y0)))
+            pt1 = (int(x0 + x1r * (x1 - x0)), int(y0 + y1r * (y1 - y0)))
+            cv2.line(image_bgr, pt0, pt1, color_bgr, linewidth)
+
+    def _bin_code_2_lines(self, arr, v, bin_code, multi_idx, Nw, Nh, offset):
+        i0, j0 = offset
+        i, j = multi_idx
+        i += i0
+        j += j0
+        v0, v1, v2, v3 = arr[i, j], arr[i + 1, j], arr[i + 1, j + 1], arr[i, j + 1]
+        x0i = float(j) / Nw
+        y0j = float(i) / Nh
+        He = 1.0 / Nh
+        We = 1.0 / Nw
+        if (bin_code == 1) or (bin_code == 14):
+            a = (v - v0) / (v1 - v0)
+            b = (v - v0) / (v3 - v0)
+            pt1 = (x0i, y0j + a * He)
+            pt2 = (x0i + b * We, y0j)
+            return [(pt1, pt2)]
+        elif (bin_code == 2) or (bin_code == 13):
+            a = (v - v0) / (v1 - v0)
+            b = (v - v1) / (v2 - v1)
+            pt1 = (x0i, y0j + a * He)
+            pt2 = (x0i + b * We, y0j + He)
+            return [(pt1, pt2)]
+        elif (bin_code == 3) or (bin_code == 12):
+            a = (v - v0) / (v3 - v0)
+            b = (v - v1) / (v2 - v1)
+            pt1 = (x0i + a * We, y0j)
+            pt2 = (x0i + b * We, y0j + He)
+            return [(pt1, pt2)]
+        elif (bin_code == 4) or (bin_code == 11):
+            a = (v - v1) / (v2 - v1)
+            b = (v - v3) / (v2 - v3)
+            pt1 = (x0i + a * We, y0j + He)
+            pt2 = (x0i + We, y0j + b * He)
+            return [(pt1, pt2)]
+        elif (bin_code == 6) or (bin_code == 9):
+            a = (v - v0) / (v1 - v0)
+            b = (v - v3) / (v2 - v3)
+            pt1 = (x0i, y0j + a * He)
+            pt2 = (x0i + We, y0j + b * He)
+            return [(pt1, pt2)]
+        elif (bin_code == 7) or (bin_code == 8):
+            a = (v - v0) / (v3 - v0)
+            b = (v - v3) / (v2 - v3)
+            pt1 = (x0i + a * We, y0j)
+            pt2 = (x0i + We, y0j + b * He)
+            return [(pt1, pt2)]
+        elif bin_code == 5:
+            a1 = (v - v0) / (v1 - v0)
+            b1 = (v - v1) / (v2 - v1)
+            pt11 = (x0i, y0j + a1 * He)
+            pt12 = (x0i + b1 * We, y0j + He)
+            a2 = (v - v0) / (v3 - v0)
+            b2 = (v - v3) / (v2 - v3)
+            pt21 = (x0i + a2 * We, y0j)
+            pt22 = (x0i + We, y0j + b2 * He)
+            return [(pt11, pt12), (pt21, pt22)]
+        elif bin_code == 10:
+            a1 = (v - v0) / (v3 - v0)
+            b1 = (v - v0) / (v1 - v0)
+            pt11 = (x0i + a1 * We, y0j)
+            pt12 = (x0i, y0j + b1 * He)
+            a2 = (v - v1) / (v2 - v1)
+            b2 = (v - v3) / (v2 - v3)
+            pt21 = (x0i + a2 * We, y0j + He)
+            pt22 = (x0i + We, y0j + b2 * He)
+            return [(pt11, pt12), (pt21, pt22)]
+        return []
+
+
+try:
+    import matplotlib
+
+    matplotlib.use("Agg")
+    DensePoseResultsContourVisualizer = DensePoseResultsMplContourVisualizer
+except ModuleNotFoundError:
+    logger = logging.getLogger(__name__)
+    logger.warning("Could not import matplotlib, using custom contour visualizer")
+    DensePoseResultsContourVisualizer = DensePoseResultsCustomContourVisualizer
+
+
+class DensePoseResultsFineSegmentationVisualizer(DensePoseMaskedColormapResultsVisualizer):
+    def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7):
+        super(DensePoseResultsFineSegmentationVisualizer, self).__init__(
+            _extract_i_from_iuvarr,
+            _extract_i_from_iuvarr,
+            inplace,
+            cmap,
+            alpha,
+            val_scale=255.0 / DensePoseDataRelative.N_PART_LABELS,
+        )
+
+
+class DensePoseResultsUVisualizer(DensePoseMaskedColormapResultsVisualizer):
+    def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7):
+        super(DensePoseResultsUVisualizer, self).__init__(
+            _extract_u_from_iuvarr, _extract_i_from_iuvarr, inplace, cmap, alpha, val_scale=1.0
+        )
+
+
+class DensePoseResultsVVisualizer(DensePoseMaskedColormapResultsVisualizer):
+    def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7):
+        super(DensePoseResultsVVisualizer, self).__init__(
+            _extract_v_from_iuvarr, _extract_i_from_iuvarr, inplace, cmap, alpha, val_scale=1.0
+        )
+
+
+class DensePoseOutputsFineSegmentationVisualizer(object):
+    def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7):
+        self.mask_visualizer = MatrixVisualizer(
+            inplace=inplace,
+            cmap=cmap,
+            val_scale=255.0 / DensePoseDataRelative.N_PART_LABELS,
+            alpha=alpha,
+        )
+
+    def visualize(
+        self, image_bgr: Image, dp_output_with_bboxes: Optional[Tuple[DensePoseOutput, Boxes]]
+    ) -> Image:
+        if dp_output_with_bboxes is None:
+            return image_bgr
+        densepose_output, bboxes_xywh = dp_output_with_bboxes
+        S = densepose_output.S
+        I = densepose_output.I  # noqa
+        U = densepose_output.U
+        V = densepose_output.V
+        N = S.size(0)
+        assert N == I.size(
+            0
+        ), "densepose outputs S {} and I {}" " should have equal first dim size".format(
+            S.size(), I.size()
+        )
+        assert N == U.size(
+            0
+        ), "densepose outputs S {} and U {}" " should have equal first dim size".format(
+            S.size(), U.size()
+        )
+        assert N == V.size(
+            0
+        ), "densepose outputs S {} and V {}" " should have equal first dim size".format(
+            S.size(), V.size()
+        )
+        assert N == len(
+            bboxes_xywh
+        ), "number of bounding boxes {}" " should be equal to first dim size of outputs {}".format(
+            len(bboxes_xywh), N
+        )
+        for n in range(N):
+            Sn = S[n].argmax(dim=0)
+            In = I[n].argmax(dim=0) * (Sn > 0).long()
+            matrix = In.cpu().numpy().astype(np.uint8)
+            mask = np.zeros(matrix.shape, dtype=np.uint8)
+            mask[matrix > 0] = 1
+            bbox_xywh = bboxes_xywh[n]
+            image_bgr = self.mask_visualizer.visualize(image_bgr, mask, matrix, bbox_xywh)
+        return image_bgr
+
+
+class DensePoseOutputsUVisualizer(object):
+    def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7):
+        self.mask_visualizer = MatrixVisualizer(
+            inplace=inplace, cmap=cmap, val_scale=1.0, alpha=alpha
+        )
+
+    def visualize(
+        self, image_bgr: Image, dp_output_with_bboxes: Optional[Tuple[DensePoseOutput, Boxes]]
+    ) -> Image:
+        if dp_output_with_bboxes is None:
+            return image_bgr
+        densepose_output, bboxes_xywh = dp_output_with_bboxes
+        assert isinstance(
+            densepose_output, DensePoseOutput
+        ), "DensePoseOutput expected, {} encountered".format(type(densepose_output))
+        S = densepose_output.S
+        I = densepose_output.I  # noqa
+        U = densepose_output.U
+        V = densepose_output.V
+        N = S.size(0)
+        assert N == I.size(
+            0
+        ), "densepose outputs S {} and I {}" " should have equal first dim size".format(
+            S.size(), I.size()
+        )
+        assert N == U.size(
+            0
+        ), "densepose outputs S {} and U {}" " should have equal first dim size".format(
+            S.size(), U.size()
+        )
+        assert N == V.size(
+            0
+        ), "densepose outputs S {} and V {}" " should have equal first dim size".format(
+            S.size(), V.size()
+        )
+        assert N == len(
+            bboxes_xywh
+        ), "number of bounding boxes {}" " should be equal to first dim size of outputs {}".format(
+            len(bboxes_xywh), N
+        )
+        for n in range(N):
+            Sn = S[n].argmax(dim=0)
+            In = I[n].argmax(dim=0) * (Sn > 0).long()
+            segmentation = In.cpu().numpy().astype(np.uint8)
+            mask = np.zeros(segmentation.shape, dtype=np.uint8)
+            mask[segmentation > 0] = 1
+            Un = U[n].cpu().numpy().astype(np.float32)
+            Uvis = np.zeros(segmentation.shape, dtype=np.float32)
+            for partId in range(Un.shape[0]):
+                Uvis[segmentation == partId] = Un[partId][segmentation == partId].clip(0, 1) * 255
+                bbox_xywh = bboxes_xywh[n]
+            image_bgr = self.mask_visualizer.visualize(image_bgr, mask, Uvis, bbox_xywh)
+        return image_bgr
+
+
+class DensePoseOutputsVVisualizer(object):
+    def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7):
+        self.mask_visualizer = MatrixVisualizer(
+            inplace=inplace, cmap=cmap, val_scale=1.0, alpha=alpha
+        )
+
+    def visualize(
+        self, image_bgr: Image, dp_output_with_bboxes: Optional[Tuple[DensePoseOutput, Boxes]]
+    ) -> Image:
+        if dp_output_with_bboxes is None:
+            return image_bgr
+        densepose_output, bboxes_xywh = dp_output_with_bboxes
+        assert isinstance(
+            densepose_output, DensePoseOutput
+        ), "DensePoseOutput expected, {} encountered".format(type(densepose_output))
+        S = densepose_output.S
+        I = densepose_output.I  # noqa
+        U = densepose_output.U
+        V = densepose_output.V
+        N = S.size(0)
+        assert N == I.size(
+            0
+        ), "densepose outputs S {} and I {}" " should have equal first dim size".format(
+            S.size(), I.size()
+        )
+        assert N == U.size(
+            0
+        ), "densepose outputs S {} and U {}" " should have equal first dim size".format(
+            S.size(), U.size()
+        )
+        assert N == V.size(
+            0
+        ), "densepose outputs S {} and V {}" " should have equal first dim size".format(
+            S.size(), V.size()
+        )
+        assert N == len(
+            bboxes_xywh
+        ), "number of bounding boxes {}" " should be equal to first dim size of outputs {}".format(
+            len(bboxes_xywh), N
+        )
+        for n in range(N):
+            Sn = S[n].argmax(dim=0)
+            In = I[n].argmax(dim=0) * (Sn > 0).long()
+            segmentation = In.cpu().numpy().astype(np.uint8)
+            mask = np.zeros(segmentation.shape, dtype=np.uint8)
+            mask[segmentation > 0] = 1
+            Vn = V[n].cpu().numpy().astype(np.float32)
+            Vvis = np.zeros(segmentation.shape, dtype=np.float32)
+            for partId in range(Vn.size(0)):
+                Vvis[segmentation == partId] = Vn[partId][segmentation == partId].clip(0, 1) * 255
+            bbox_xywh = bboxes_xywh[n]
+            image_bgr = self.mask_visualizer.visualize(image_bgr, mask, Vvis, bbox_xywh)
+        return image_bgr
+
+
+class DensePoseDataCoarseSegmentationVisualizer(object):
+    """
+    Visualizer for ground truth segmentation
+    """
+
+    def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7):
+        self.mask_visualizer = MatrixVisualizer(
+            inplace=inplace,
+            cmap=cmap,
+            val_scale=255.0 / DensePoseDataRelative.N_BODY_PARTS,
+            alpha=alpha,
+        )
+
+    def visualize(
+        self,
+        image_bgr: Image,
+        bbox_densepose_datas: Optional[Tuple[Iterable[Boxes], Iterable[DensePoseDataRelative]]],
+    ) -> Image:
+        if bbox_densepose_datas is None:
+            return image_bgr
+        for bbox_xywh, densepose_data in zip(*bbox_densepose_datas):
+            matrix = densepose_data.segm.numpy()
+            mask = np.zeros(matrix.shape, dtype=np.uint8)
+            mask[matrix > 0] = 1
+            image_bgr = self.mask_visualizer.visualize(image_bgr, mask, matrix, bbox_xywh.numpy())
+        return image_bgr
+
+
+class DensePoseDataPointsVisualizer(object):
+    def __init__(self, densepose_data_to_value_fn=None, cmap=cv2.COLORMAP_PARULA):
+        self.points_visualizer = PointsVisualizer()
+        self.densepose_data_to_value_fn = densepose_data_to_value_fn
+        self.cmap = cmap
+
+    def visualize(
+        self,
+        image_bgr: Image,
+        bbox_densepose_datas: Optional[Tuple[Iterable[Boxes], Iterable[DensePoseDataRelative]]],
+    ) -> Image:
+        if bbox_densepose_datas is None:
+            return image_bgr
+        for bbox_xywh, densepose_data in zip(*bbox_densepose_datas):
+            x0, y0, w, h = bbox_xywh.numpy()
+            x = densepose_data.x.numpy() * w / 255.0 + x0
+            y = densepose_data.y.numpy() * h / 255.0 + y0
+            pts_xy = zip(x, y)
+            if self.densepose_data_to_value_fn is None:
+                image_bgr = self.points_visualizer.visualize(image_bgr, pts_xy)
+            else:
+                v = self.densepose_data_to_value_fn(densepose_data)
+                img_colors_bgr = cv2.applyColorMap(v, self.cmap)
+                colors_bgr = [
+                    [int(v) for v in img_color_bgr.ravel()] for img_color_bgr in img_colors_bgr
+                ]
+                image_bgr = self.points_visualizer.visualize(image_bgr, pts_xy, colors_bgr)
+        return image_bgr
+
+
+def _densepose_data_u_for_cmap(densepose_data):
+    u = np.clip(densepose_data.u.numpy(), 0, 1) * 255.0
+    return u.astype(np.uint8)
+
+
+def _densepose_data_v_for_cmap(densepose_data):
+    v = np.clip(densepose_data.v.numpy(), 0, 1) * 255.0
+    return v.astype(np.uint8)
+
+
+def _densepose_data_i_for_cmap(densepose_data):
+    i = (
+        np.clip(densepose_data.i.numpy(), 0.0, DensePoseDataRelative.N_PART_LABELS)
+        * 255.0
+        / DensePoseDataRelative.N_PART_LABELS
+    )
+    return i.astype(np.uint8)
+
+
+class DensePoseDataPointsUVisualizer(DensePoseDataPointsVisualizer):
+    def __init__(self):
+        super(DensePoseDataPointsUVisualizer, self).__init__(
+            densepose_data_to_value_fn=_densepose_data_u_for_cmap
+        )
+
+
+class DensePoseDataPointsVVisualizer(DensePoseDataPointsVisualizer):
+    def __init__(self):
+        super(DensePoseDataPointsVVisualizer, self).__init__(
+            densepose_data_to_value_fn=_densepose_data_v_for_cmap
+        )
+
+
+class DensePoseDataPointsIVisualizer(DensePoseDataPointsVisualizer):
+    def __init__(self):
+        super(DensePoseDataPointsIVisualizer, self).__init__(
+            densepose_data_to_value_fn=_densepose_data_i_for_cmap
+        )
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/vis/extractor.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/vis/extractor.py
@@ -0,0 +1,152 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+import logging
+from typing import Sequence
+import torch
+
+from detectron2.layers.nms import batched_nms
+from detectron2.structures.instances import Instances
+
+from densepose.vis.bounding_box import BoundingBoxVisualizer, ScoredBoundingBoxVisualizer
+from densepose.vis.densepose import DensePoseResultsVisualizer
+
+from .base import CompoundVisualizer
+
+Scores = Sequence[float]
+
+
+def extract_scores_from_instances(instances: Instances, select=None):
+    if instances.has("scores"):
+        return instances.scores if select is None else instances.scores[select]
+    return None
+
+
+def extract_boxes_xywh_from_instances(instances: Instances, select=None):
+    if instances.has("pred_boxes"):
+        boxes_xywh = instances.pred_boxes.tensor.clone()
+        boxes_xywh[:, 2] -= boxes_xywh[:, 0]
+        boxes_xywh[:, 3] -= boxes_xywh[:, 1]
+        return boxes_xywh if select is None else boxes_xywh[select]
+    return None
+
+
+def create_extractor(visualizer: object):
+    """
+    Create an extractor for the provided visualizer
+    """
+    if isinstance(visualizer, CompoundVisualizer):
+        extractors = [create_extractor(v) for v in visualizer.visualizers]
+        return CompoundExtractor(extractors)
+    elif isinstance(visualizer, DensePoseResultsVisualizer):
+        return DensePoseResultExtractor()
+    elif isinstance(visualizer, ScoredBoundingBoxVisualizer):
+        return CompoundExtractor([extract_boxes_xywh_from_instances, extract_scores_from_instances])
+    elif isinstance(visualizer, BoundingBoxVisualizer):
+        return extract_boxes_xywh_from_instances
+    else:
+        logger = logging.getLogger(__name__)
+        logger.error(f"Could not create extractor for {visualizer}")
+        return None
+
+
+class BoundingBoxExtractor(object):
+    """
+    Extracts bounding boxes from instances
+    """
+
+    def __call__(self, instances: Instances):
+        boxes_xywh = extract_boxes_xywh_from_instances(instances)
+        return boxes_xywh
+
+
+class ScoredBoundingBoxExtractor(object):
+    """
+    Extracts bounding boxes from instances
+    """
+
+    def __call__(self, instances: Instances, select=None):
+        scores = extract_scores_from_instances(instances)
+        boxes_xywh = extract_boxes_xywh_from_instances(instances)
+        if (scores is None) or (boxes_xywh is None):
+            return (boxes_xywh, scores)
+        if select is not None:
+            scores = scores[select]
+            boxes_xywh = boxes_xywh[select]
+        return (boxes_xywh, scores)
+
+
+class DensePoseResultExtractor(object):
+    """
+    Extracts DensePose result from instances
+    """
+
+    def __call__(self, instances: Instances, select=None):
+        boxes_xywh = extract_boxes_xywh_from_instances(instances)
+        if instances.has("pred_densepose") and (boxes_xywh is not None):
+            dpout = instances.pred_densepose
+            if select is not None:
+                dpout = dpout[select]
+                boxes_xywh = boxes_xywh[select]
+            return dpout.to_result(boxes_xywh)
+        else:
+            return None
+
+
+class CompoundExtractor(object):
+    """
+    Extracts data for CompoundVisualizer
+    """
+
+    def __init__(self, extractors):
+        self.extractors = extractors
+
+    def __call__(self, instances: Instances, select=None):
+        datas = []
+        for extractor in self.extractors:
+            data = extractor(instances, select)
+            datas.append(data)
+        return datas
+
+
+class NmsFilteredExtractor(object):
+    """
+    Extracts data in the format accepted by NmsFilteredVisualizer
+    """
+
+    def __init__(self, extractor, iou_threshold):
+        self.extractor = extractor
+        self.iou_threshold = iou_threshold
+
+    def __call__(self, instances: Instances, select=None):
+        scores = extract_scores_from_instances(instances)
+        boxes_xywh = extract_boxes_xywh_from_instances(instances)
+        if boxes_xywh is None:
+            return None
+        select_local_idx = batched_nms(
+            boxes_xywh,
+            scores,
+            torch.zeros(len(scores), dtype=torch.int32),
+            iou_threshold=self.iou_threshold,
+        ).squeeze()
+        select_local = torch.zeros(len(boxes_xywh), dtype=torch.bool, device=boxes_xywh.device)
+        select_local[select_local_idx] = True
+        select = select_local if select is None else (select & select_local)
+        return self.extractor(instances, select=select)
+
+
+class ScoreThresholdedExtractor(object):
+    """
+    Extracts data in the format accepted by ScoreThresholdedVisualizer
+    """
+
+    def __init__(self, extractor, min_score):
+        self.extractor = extractor
+        self.min_score = min_score
+
+    def __call__(self, instances: Instances, select=None):
+        scores = extract_scores_from_instances(instances)
+        if scores is None:
+            return None
+        select_local = scores > self.min_score
+        select = select_local if select is None else (select & select_local)
+        data = self.extractor(instances, select=select)
+        return data