Add at new repo again

2025-01-28 21:48:35 +00:00
commit 6e660ddb3c
564 changed files with 75575 additions and 0 deletions
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/README.md
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/README.md
@@ -0,0 +1,54 @@
+# DensePose in Detectron2
+**Dense Human Pose Estimation In The Wild**
+
+_Rıza Alp Güler, Natalia Neverova, Iasonas Kokkinos_
+
+[[`densepose.org`](https://densepose.org)] [[`arXiv`](https://arxiv.org/abs/1802.00434)] [[`BibTeX`](#CitingDensePose)]
+
+Dense human pose estimation aims at mapping all human pixels of an RGB image to the 3D surface of the human body.
+
+<div align="center">
+  <img src="https://drive.google.com/uc?export=view&id=1qfSOkpueo1kVZbXOuQJJhyagKjMgepsz" width="700px" />
+</div>
+
+In this repository, we provide the code to train and evaluate DensePose-RCNN. We also provide tools to visualize
+DensePose annotation and results.
+
+# Quick Start
+
+See [ Getting Started ](doc/GETTING_STARTED.md)
+
+# Model Zoo and Baselines
+
+We provide a number of baseline results and trained models available for download. See [Model Zoo](doc/MODEL_ZOO.md) for details.
+
+# License
+
+Detectron2 is released under the [Apache 2.0 license](../../LICENSE)
+
+## <a name="CitingDensePose"></a>Citing DensePose
+
+If you use DensePose, please take the references from the following BibTeX entries:
+
+For DensePose with estimated confidences:
+
+```
+@InProceedings{Neverova2019DensePoseConfidences,
+    title = {Correlated Uncertainty for Learning Dense Correspondences from Noisy Labels},
+    author = {Neverova, Natalia and Novotny, David and Vedaldi, Andrea},
+    journal = {Advances in Neural Information Processing Systems},
+    year = {2019},
+}
+```
+
+For the original DensePose:
+
+```
+@InProceedings{Guler2018DensePose,
+  title={DensePose: Dense Human Pose Estimation In The Wild},
+  author={R\{i}za Alp G\"uler, Natalia Neverova, Iasonas Kokkinos},
+  journal={The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+  year={2018}
+}
+```
+
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/apply_net.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/apply_net.py
@@ -0,0 +1,318 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+import argparse
+import glob
+import logging
+import os
+import pickle
+import sys
+from typing import Any, ClassVar, Dict, List
+import torch
+
+from detectron2.config import get_cfg
+from detectron2.data.detection_utils import read_image
+from detectron2.engine.defaults import DefaultPredictor
+from detectron2.structures.boxes import BoxMode
+from detectron2.structures.instances import Instances
+from detectron2.utils.logger import setup_logger
+
+from densepose import add_densepose_config
+from densepose.utils.logger import verbosity_to_level
+from densepose.vis.base import CompoundVisualizer
+from densepose.vis.bounding_box import ScoredBoundingBoxVisualizer
+from densepose.vis.densepose import (
+    DensePoseResultsContourVisualizer,
+    DensePoseResultsFineSegmentationVisualizer,
+    DensePoseResultsUVisualizer,
+    DensePoseResultsVVisualizer,
+)
+from densepose.vis.extractor import CompoundExtractor, create_extractor
+
+DOC = """Apply Net - a tool to print / visualize DensePose results
+"""
+
+LOGGER_NAME = "apply_net"
+logger = logging.getLogger(LOGGER_NAME)
+
+_ACTION_REGISTRY: Dict[str, "Action"] = {}
+
+
+class Action(object):
+    @classmethod
+    def add_arguments(cls: type, parser: argparse.ArgumentParser):
+        parser.add_argument(
+            "-v",
+            "--verbosity",
+            action="count",
+            help="Verbose mode. Multiple -v options increase the verbosity.",
+        )
+
+
+def register_action(cls: type):
+    """
+    Decorator for action classes to automate action registration
+    """
+    global _ACTION_REGISTRY
+    _ACTION_REGISTRY[cls.COMMAND] = cls
+    return cls
+
+
+class InferenceAction(Action):
+    @classmethod
+    def add_arguments(cls: type, parser: argparse.ArgumentParser):
+        super(InferenceAction, cls).add_arguments(parser)
+        parser.add_argument("cfg", metavar="<config>", help="Config file")
+        parser.add_argument("model", metavar="<model>", help="Model file")
+        parser.add_argument("input", metavar="<input>", help="Input data")
+        parser.add_argument(
+            "--opts",
+            help="Modify config options using the command-line 'KEY VALUE' pairs",
+            default=[],
+            nargs=argparse.REMAINDER,
+        )
+
+    @classmethod
+    def execute(cls: type, args: argparse.Namespace):
+        logger.info(f"Loading config from {args.cfg}")
+        opts = []
+        cfg = cls.setup_config(args.cfg, args.model, args, opts)
+        logger.info(f"Loading model from {args.model}")
+        predictor = DefaultPredictor(cfg)
+        logger.info(f"Loading data from {args.input}")
+        file_list = cls._get_input_file_list(args.input)
+        if len(file_list) == 0:
+            logger.warning(f"No input images for {args.input}")
+            return
+        context = cls.create_context(args)
+        for file_name in file_list:
+            img = read_image(file_name, format="BGR")  # predictor expects BGR image.
+            with torch.no_grad():
+                outputs = predictor(img)["instances"]
+                cls.execute_on_outputs(context, {"file_name": file_name, "image": img}, outputs)
+        cls.postexecute(context)
+
+    @classmethod
+    def setup_config(
+        cls: type, config_fpath: str, model_fpath: str, args: argparse.Namespace, opts: List[str]
+    ):
+        cfg = get_cfg()
+        add_densepose_config(cfg)
+        cfg.merge_from_file(config_fpath)
+        cfg.merge_from_list(args.opts)
+        if opts:
+            cfg.merge_from_list(opts)
+        cfg.MODEL.WEIGHTS = model_fpath
+        cfg.freeze()
+        return cfg
+
+    @classmethod
+    def _get_input_file_list(cls: type, input_spec: str):
+        if os.path.isdir(input_spec):
+            file_list = [
+                os.path.join(input_spec, fname)
+                for fname in os.listdir(input_spec)
+                if os.path.isfile(os.path.join(input_spec, fname))
+            ]
+        elif os.path.isfile(input_spec):
+            file_list = [input_spec]
+        else:
+            file_list = glob.glob(input_spec)
+        return file_list
+
+
+@register_action
+class DumpAction(InferenceAction):
+    """
+    Dump action that outputs results to a pickle file
+    """
+
+    COMMAND: ClassVar[str] = "dump"
+
+    @classmethod
+    def add_parser(cls: type, subparsers: argparse._SubParsersAction):
+        parser = subparsers.add_parser(cls.COMMAND, help="Dump model outputs to a file.")
+        cls.add_arguments(parser)
+        parser.set_defaults(func=cls.execute)
+
+    @classmethod
+    def add_arguments(cls: type, parser: argparse.ArgumentParser):
+        super(DumpAction, cls).add_arguments(parser)
+        parser.add_argument(
+            "--output",
+            metavar="<dump_file>",
+            default="results.pkl",
+            help="File name to save dump to",
+        )
+
+    @classmethod
+    def execute_on_outputs(
+        cls: type, context: Dict[str, Any], entry: Dict[str, Any], outputs: Instances
+    ):
+        image_fpath = entry["file_name"]
+        logger.info(f"Processing {image_fpath}")
+        result = {"file_name": image_fpath}
+        if outputs.has("scores"):
+            result["scores"] = outputs.get("scores").cpu()
+        if outputs.has("pred_boxes"):
+            result["pred_boxes_XYXY"] = outputs.get("pred_boxes").tensor.cpu()
+            if outputs.has("pred_densepose"):
+                boxes_XYWH = BoxMode.convert(
+                    result["pred_boxes_XYXY"], BoxMode.XYXY_ABS, BoxMode.XYWH_ABS
+                )
+                result["pred_densepose"] = outputs.get("pred_densepose").to_result(boxes_XYWH)
+        context["results"].append(result)
+
+    @classmethod
+    def create_context(cls: type, args: argparse.Namespace):
+        context = {"results": [], "out_fname": args.output}
+        return context
+
+    @classmethod
+    def postexecute(cls: type, context: Dict[str, Any]):
+        out_fname = context["out_fname"]
+        out_dir = os.path.dirname(out_fname)
+        if len(out_dir) > 0 and not os.path.exists(out_dir):
+            os.makedirs(out_dir)
+        with open(out_fname, "wb") as hFile:
+            pickle.dump(context["results"], hFile)
+            logger.info(f"Output saved to {out_fname}")
+
+
+@register_action
+class ShowAction(InferenceAction):
+    """
+    Show action that visualizes selected entries on an image
+    """
+
+    COMMAND: ClassVar[str] = "show"
+    VISUALIZERS: ClassVar[Dict[str, object]] = {
+        "dp_contour": DensePoseResultsContourVisualizer,
+        "dp_segm": DensePoseResultsFineSegmentationVisualizer,
+        "dp_u": DensePoseResultsUVisualizer,
+        "dp_v": DensePoseResultsVVisualizer,
+        "bbox": ScoredBoundingBoxVisualizer,
+    }
+
+    @classmethod
+    def add_parser(cls: type, subparsers: argparse._SubParsersAction):
+        parser = subparsers.add_parser(cls.COMMAND, help="Visualize selected entries")
+        cls.add_arguments(parser)
+        parser.set_defaults(func=cls.execute)
+
+    @classmethod
+    def add_arguments(cls: type, parser: argparse.ArgumentParser):
+        super(ShowAction, cls).add_arguments(parser)
+        parser.add_argument(
+            "visualizations",
+            metavar="<visualizations>",
+            help="Comma separated list of visualizations, possible values: "
+            "[{}]".format(",".join(sorted(cls.VISUALIZERS.keys()))),
+        )
+        parser.add_argument(
+            "--min_score",
+            metavar="<score>",
+            default=0.8,
+            type=float,
+            help="Minimum detection score to visualize",
+        )
+        parser.add_argument(
+            "--nms_thresh", metavar="<threshold>", default=None, type=float, help="NMS threshold"
+        )
+        parser.add_argument(
+            "--output",
+            metavar="<image_file>",
+            default="outputres.png",
+            help="File name to save output to",
+        )
+
+    @classmethod
+    def setup_config(
+        cls: type, config_fpath: str, model_fpath: str, args: argparse.Namespace, opts: List[str]
+    ):
+        opts.append("MODEL.ROI_HEADS.SCORE_THRESH_TEST")
+        opts.append(str(args.min_score))
+        if args.nms_thresh is not None:
+            opts.append("MODEL.ROI_HEADS.NMS_THRESH_TEST")
+            opts.append(str(args.nms_thresh))
+        cfg = super(ShowAction, cls).setup_config(config_fpath, model_fpath, args, opts)
+        return cfg
+
+    @classmethod
+    def execute_on_outputs(
+        cls: type, context: Dict[str, Any], entry: Dict[str, Any], outputs: Instances
+    ):
+        import cv2
+        import numpy as np
+
+        visualizer = context["visualizer"]
+        extractor = context["extractor"]
+        image_fpath = entry["file_name"]
+        logger.info(f"Processing {image_fpath}")
+        image = cv2.cvtColor(entry["image"], cv2.COLOR_BGR2GRAY)
+        image = np.tile(image[:, :, np.newaxis], [1, 1, 3])
+        data = extractor(outputs)
+        image_vis = visualizer.visualize(image, data)
+        entry_idx = context["entry_idx"] + 1
+        out_fname = cls._get_out_fname(entry_idx, context["out_fname"])
+        out_dir = os.path.dirname(out_fname)
+        if len(out_dir) > 0 and not os.path.exists(out_dir):
+            os.makedirs(out_dir)
+        cv2.imwrite(out_fname, image_vis)
+        logger.info(f"Output saved to {out_fname}")
+        context["entry_idx"] += 1
+
+    @classmethod
+    def postexecute(cls: type, context: Dict[str, Any]):
+        pass
+
+    @classmethod
+    def _get_out_fname(cls: type, entry_idx: int, fname_base: str):
+        base, ext = os.path.splitext(fname_base)
+        return base + ".{0:04d}".format(entry_idx) + ext
+
+    @classmethod
+    def create_context(cls: type, args: argparse.Namespace) -> Dict[str, Any]:
+        vis_specs = args.visualizations.split(",")
+        visualizers = []
+        extractors = []
+        for vis_spec in vis_specs:
+            vis = cls.VISUALIZERS[vis_spec]()
+            visualizers.append(vis)
+            extractor = create_extractor(vis)
+            extractors.append(extractor)
+        visualizer = CompoundVisualizer(visualizers)
+        extractor = CompoundExtractor(extractors)
+        context = {
+            "extractor": extractor,
+            "visualizer": visualizer,
+            "out_fname": args.output,
+            "entry_idx": 0,
+        }
+        return context
+
+
+def create_argument_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(
+        description=DOC,
+        formatter_class=lambda prog: argparse.HelpFormatter(prog, max_help_position=120),
+    )
+    parser.set_defaults(func=lambda _: parser.print_help(sys.stdout))
+    subparsers = parser.add_subparsers(title="Actions")
+    for _, action in _ACTION_REGISTRY.items():
+        action.add_parser(subparsers)
+    return parser
+
+
+def main():
+    parser = create_argument_parser()
+    args = parser.parse_args()
+    verbosity = args.verbosity if hasattr(args, "verbosity") else None
+    global logger
+    logger = setup_logger(name=LOGGER_NAME)
+    logger.setLevel(verbosity_to_level(verbosity))
+    args.func(args)
+
+
+if __name__ == "__main__":
+    main()
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/Base-DensePose-RCNN-FPN.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/Base-DensePose-RCNN-FPN.yaml
@@ -0,0 +1,47 @@
+MODEL:
+  META_ARCHITECTURE: "GeneralizedRCNN"
+  BACKBONE:
+    NAME: "build_resnet_fpn_backbone"
+  RESNETS:
+    OUT_FEATURES: ["res2", "res3", "res4", "res5"]
+  FPN:
+    IN_FEATURES: ["res2", "res3", "res4", "res5"]
+  ANCHOR_GENERATOR:
+    SIZES: [[32], [64], [128], [256], [512]]  # One size for each in feature map
+    ASPECT_RATIOS: [[0.5, 1.0, 2.0]]  # Three aspect ratios (same for all in feature maps)
+  RPN:
+    IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
+    PRE_NMS_TOPK_TRAIN: 2000  # Per FPN level
+    PRE_NMS_TOPK_TEST: 1000  # Per FPN level
+    # Detectron1 uses 2000 proposals per-batch,
+    # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
+    # which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
+    POST_NMS_TOPK_TRAIN: 1000
+    POST_NMS_TOPK_TEST: 1000
+
+  DENSEPOSE_ON: True
+  ROI_HEADS:
+    NAME: "DensePoseROIHeads"
+    IN_FEATURES: ["p2", "p3", "p4", "p5"]
+    NUM_CLASSES: 1
+  ROI_BOX_HEAD:
+    NAME: "FastRCNNConvFCHead"
+    NUM_FC: 2
+    POOLER_RESOLUTION: 7
+    POOLER_SAMPLING_RATIO: 2
+    POOLER_TYPE: "ROIAlign"
+  ROI_DENSEPOSE_HEAD:
+    NAME: "DensePoseV1ConvXHead"
+    POOLER_TYPE: "ROIAlign"
+    NUM_COARSE_SEGM_CHANNELS: 2
+DATASETS:
+  TRAIN: ("densepose_coco_2014_train", "densepose_coco_2014_valminusminival")
+  TEST: ("densepose_coco_2014_minival",)
+SOLVER:
+  IMS_PER_BATCH: 16
+  BASE_LR: 0.01
+  STEPS: (60000, 80000)
+  MAX_ITER: 90000
+  WARMUP_FACTOR: 0.1
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC1_s1x.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC1_s1x.yaml
@@ -0,0 +1,16 @@
+_BASE_: "Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+  RESNETS:
+    DEPTH: 101
+  ROI_DENSEPOSE_HEAD:
+    NAME: "DensePoseDeepLabHead"
+    UV_CONFIDENCE:
+      ENABLED: True
+      TYPE: "iid_iso"
+    POINT_REGRESSION_WEIGHTS: 0.0005
+SOLVER:
+  CLIP_GRADIENTS:
+    ENABLED: True
+  MAX_ITER: 130000
+  STEPS: (100000, 120000)
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC2_s1x.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC2_s1x.yaml
@@ -0,0 +1,16 @@
+_BASE_: "Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+  RESNETS:
+    DEPTH: 101
+  ROI_DENSEPOSE_HEAD:
+    NAME: "DensePoseDeepLabHead"
+    UV_CONFIDENCE:
+      ENABLED: True
+      TYPE: "indep_aniso"
+    POINT_REGRESSION_WEIGHTS: 0.0005
+SOLVER:
+  CLIP_GRADIENTS:
+    ENABLED: True
+  MAX_ITER: 130000
+  STEPS: (100000, 120000)
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_s1x.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_s1x.yaml
@@ -0,0 +1,10 @@
+_BASE_: "Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+  RESNETS:
+    DEPTH: 101
+  ROI_DENSEPOSE_HEAD:
+    NAME: "DensePoseDeepLabHead"
+SOLVER:
+  MAX_ITER: 130000
+  STEPS: (100000, 120000)
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC1_s1x.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC1_s1x.yaml
@@ -0,0 +1,16 @@
+_BASE_: "Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+  RESNETS:
+    DEPTH: 101
+  ROI_DENSEPOSE_HEAD:
+    UV_CONFIDENCE:
+      ENABLED: True
+      TYPE: "iid_iso"
+    POINT_REGRESSION_WEIGHTS: 0.0005
+SOLVER:
+  CLIP_GRADIENTS:
+    ENABLED: True
+  MAX_ITER: 130000
+  STEPS: (100000, 120000)
+  WARMUP_FACTOR: 0.025
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC2_s1x.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC2_s1x.yaml
@@ -0,0 +1,16 @@
+_BASE_: "Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+  RESNETS:
+    DEPTH: 101
+  ROI_DENSEPOSE_HEAD:
+    UV_CONFIDENCE:
+      ENABLED: True
+      TYPE: "indep_aniso"
+    POINT_REGRESSION_WEIGHTS: 0.0005
+SOLVER:
+  CLIP_GRADIENTS:
+    ENABLED: True
+  MAX_ITER: 130000
+  STEPS: (100000, 120000)
+  WARMUP_FACTOR: 0.025
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_s1x.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_s1x.yaml
@@ -0,0 +1,8 @@
+_BASE_: "Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+  RESNETS:
+    DEPTH: 101
+SOLVER:
+  MAX_ITER: 130000
+  STEPS: (100000, 120000)
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_s1x_legacy.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_s1x_legacy.yaml
@@ -0,0 +1,17 @@
+_BASE_: "Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+  RESNETS:
+    DEPTH: 101
+  ROI_DENSEPOSE_HEAD:
+    NUM_COARSE_SEGM_CHANNELS: 15
+    POOLER_RESOLUTION: 14
+    HEATMAP_SIZE: 56
+    INDEX_WEIGHTS: 2.0
+    PART_WEIGHTS: 0.3
+    POINT_REGRESSION_WEIGHTS: 0.1
+    DECODER_ON: False
+SOLVER:
+  BASE_LR: 0.002
+  MAX_ITER: 130000
+  STEPS: (100000, 120000)
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC1_s1x.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC1_s1x.yaml
@@ -0,0 +1,16 @@
+_BASE_: "Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  RESNETS:
+    DEPTH: 50
+  ROI_DENSEPOSE_HEAD:
+    NAME: "DensePoseDeepLabHead"
+    UV_CONFIDENCE:
+      ENABLED: True
+      TYPE: "iid_iso"
+    POINT_REGRESSION_WEIGHTS: 0.0005
+SOLVER:
+  CLIP_GRADIENTS:
+    ENABLED: True
+  MAX_ITER: 130000
+  STEPS: (100000, 120000)
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC2_s1x.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC2_s1x.yaml
@@ -0,0 +1,16 @@
+_BASE_: "Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  RESNETS:
+    DEPTH: 50
+  ROI_DENSEPOSE_HEAD:
+    NAME: "DensePoseDeepLabHead"
+    UV_CONFIDENCE:
+      ENABLED: True
+      TYPE: "indep_aniso"
+    POINT_REGRESSION_WEIGHTS: 0.0005
+SOLVER:
+  CLIP_GRADIENTS:
+    ENABLED: True
+  MAX_ITER: 130000
+  STEPS: (100000, 120000)
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_s1x.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_s1x.yaml
@@ -0,0 +1,10 @@
+_BASE_: "Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  RESNETS:
+    DEPTH: 50
+  ROI_DENSEPOSE_HEAD:
+    NAME: "DensePoseDeepLabHead"
+SOLVER:
+  MAX_ITER: 130000
+  STEPS: (100000, 120000)
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC1_s1x.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC1_s1x.yaml
@@ -0,0 +1,16 @@
+_BASE_: "Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  RESNETS:
+    DEPTH: 50
+  ROI_DENSEPOSE_HEAD:
+    UV_CONFIDENCE:
+      ENABLED: True
+      TYPE: "iid_iso"
+    POINT_REGRESSION_WEIGHTS: 0.0005
+SOLVER:
+  CLIP_GRADIENTS:
+    ENABLED: True
+  MAX_ITER: 130000
+  STEPS: (100000, 120000)
+  WARMUP_FACTOR: 0.025
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC2_s1x.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC2_s1x.yaml
@@ -0,0 +1,16 @@
+_BASE_: "Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  RESNETS:
+    DEPTH: 50
+  ROI_DENSEPOSE_HEAD:
+    UV_CONFIDENCE:
+      ENABLED: True
+      TYPE: "indep_aniso"
+    POINT_REGRESSION_WEIGHTS: 0.0005
+SOLVER:
+  CLIP_GRADIENTS:
+    ENABLED: True
+  MAX_ITER: 130000
+  STEPS: (100000, 120000)
+  WARMUP_FACTOR: 0.025
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_s1x.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_s1x.yaml
@@ -0,0 +1,8 @@
+_BASE_: "Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  RESNETS:
+    DEPTH: 50
+SOLVER:
+  MAX_ITER: 130000
+  STEPS: (100000, 120000)
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_s1x_legacy.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_s1x_legacy.yaml
@@ -0,0 +1,17 @@
+_BASE_: "Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  RESNETS:
+    DEPTH: 50
+  ROI_DENSEPOSE_HEAD:
+    NUM_COARSE_SEGM_CHANNELS: 15
+    POOLER_RESOLUTION: 14
+    HEATMAP_SIZE: 56
+    INDEX_WEIGHTS: 2.0
+    PART_WEIGHTS: 0.3
+    POINT_REGRESSION_WEIGHTS: 0.1
+    DECODER_ON: False
+SOLVER:
+  BASE_LR: 0.002
+  MAX_ITER: 130000
+  STEPS: (100000, 120000)
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/evolution/Base-RCNN-FPN-MC.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/evolution/Base-RCNN-FPN-MC.yaml
@@ -0,0 +1,91 @@
+MODEL:
+  META_ARCHITECTURE: "GeneralizedRCNN"
+  BACKBONE:
+    NAME: "build_resnet_fpn_backbone"
+  RESNETS:
+    OUT_FEATURES: ["res2", "res3", "res4", "res5"]
+  FPN:
+    IN_FEATURES: ["res2", "res3", "res4", "res5"]
+  ANCHOR_GENERATOR:
+    SIZES: [[32], [64], [128], [256], [512]]  # One size for each in feature map
+    ASPECT_RATIOS: [[0.5, 1.0, 2.0]]  # Three aspect ratios (same for all in feature maps)
+  RPN:
+    IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
+    PRE_NMS_TOPK_TRAIN: 2000  # Per FPN level
+    PRE_NMS_TOPK_TEST: 1000  # Per FPN level
+    # Detectron1 uses 2000 proposals per-batch,
+    # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
+    # which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
+    POST_NMS_TOPK_TRAIN: 1000
+    POST_NMS_TOPK_TEST: 1000
+  ROI_HEADS:
+    NAME: "StandardROIHeads"
+    IN_FEATURES: ["p2", "p3", "p4", "p5"]
+    NUM_CLASSES: 1
+  ROI_BOX_HEAD:
+    NAME: "FastRCNNConvFCHead"
+    NUM_FC: 2
+    POOLER_RESOLUTION: 7
+  ROI_MASK_HEAD:
+    NAME: "MaskRCNNConvUpsampleHead"
+    NUM_CONV: 4
+    POOLER_RESOLUTION: 14
+DATASETS:
+  TRAIN: ("base_coco_2017_train",)
+  TEST: ("base_coco_2017_val", "densepose_chimps")
+  CATEGORY_MAPS:
+    "base_coco_2017_train":
+      "16": 1 # bird -> person
+      "17": 1 # cat -> person
+      "18": 1 # dog -> person
+      "19": 1 # horse -> person
+      "20": 1 # sheep -> person
+      "21": 1 # cow -> person
+      "22": 1 # elephant -> person
+      "23": 1 # bear -> person
+      "24": 1 # zebra -> person
+      "25": 1 # girafe -> person
+    "base_coco_2017_val":
+      "16": 1 # bird -> person
+      "17": 1 # cat -> person
+      "18": 1 # dog -> person
+      "19": 1 # horse -> person
+      "20": 1 # sheep -> person
+      "21": 1 # cow -> person
+      "22": 1 # elephant -> person
+      "23": 1 # bear -> person
+      "24": 1 # zebra -> person
+      "25": 1 # girafe -> person
+  WHITELISTED_CATEGORIES:
+    "base_coco_2017_train":
+      - 1  # person
+      - 16 # bird
+      - 17 # cat
+      - 18 # dog
+      - 19 # horse
+      - 20 # sheep
+      - 21 # cow
+      - 22 # elephant
+      - 23 # bear
+      - 24 # zebra
+      - 25 # girafe
+    "base_coco_2017_val":
+      - 1  # person
+      - 16 # bird
+      - 17 # cat
+      - 18 # dog
+      - 19 # horse
+      - 20 # sheep
+      - 21 # cow
+      - 22 # elephant
+      - 23 # bear
+      - 24 # zebra
+      - 25 # girafe
+SOLVER:
+  IMS_PER_BATCH: 16
+  BASE_LR: 0.02
+  STEPS: (60000, 80000)
+  MAX_ITER: 90000
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+VERSION: 2
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/evolution/faster_rcnn_R_50_FPN_1x_MC.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/evolution/faster_rcnn_R_50_FPN_1x_MC.yaml
@@ -0,0 +1,7 @@
+_BASE_: "Base-RCNN-FPN-MC.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: False
+  DENSEPOSE_ON: False
+  RESNETS:
+    DEPTH: 50
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_DL_instant_test.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_DL_instant_test.yaml
@@ -0,0 +1,11 @@
+_BASE_: "../Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  ROI_DENSEPOSE_HEAD:
+    NAME: "DensePoseDeepLabHead"
+DATASETS:
+  TRAIN: ("densepose_coco_2014_minival_100",)
+  TEST: ("densepose_coco_2014_minival_100",)
+SOLVER:
+  MAX_ITER: 40
+  STEPS: (30,)
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_TTA_inference_acc_test.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_TTA_inference_acc_test.yaml
@@ -0,0 +1,13 @@
+_BASE_: "../densepose_rcnn_R_50_FPN_s1x.yaml"
+MODEL:
+  WEIGHTS: "https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_s1x/165712039/model_final_162be9.pkl"
+DATASETS:
+  TRAIN: ()
+  TEST: ("densepose_coco_2014_minival_100",)
+TEST:
+  AUG:
+    ENABLED: True
+    MIN_SIZES: (400, 500, 600, 700, 800, 900, 1000, 1100, 1200)
+    MAX_SIZE: 4000
+    FLIP: True
+  EXPECTED_RESULTS: [["bbox_TTA", "AP", 61.74, 0.03], ["densepose_gps_TTA", "AP",  60.22, 0.03], ["densepose_gpsm_TTA", "AP", 63.85, 0.03]]
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_WC1_instant_test.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_WC1_instant_test.yaml
@@ -0,0 +1,19 @@
+_BASE_: "../Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  RESNETS:
+    DEPTH: 50
+  ROI_DENSEPOSE_HEAD:
+    UV_CONFIDENCE:
+      ENABLED: True
+      TYPE: "iid_iso"
+    POINT_REGRESSION_WEIGHTS: 0.0005
+DATASETS:
+  TRAIN: ("densepose_coco_2014_minival_100",)
+  TEST: ("densepose_coco_2014_minival_100",)
+SOLVER:
+  CLIP_GRADIENTS:
+    ENABLED: True
+  MAX_ITER: 40
+  STEPS: (30,)
+  WARMUP_FACTOR: 0.025
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_WC2_instant_test.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_WC2_instant_test.yaml
@@ -0,0 +1,19 @@
+_BASE_: "../Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  RESNETS:
+    DEPTH: 50
+  ROI_DENSEPOSE_HEAD:
+    UV_CONFIDENCE:
+      ENABLED: True
+      TYPE: "indep_aniso"
+    POINT_REGRESSION_WEIGHTS: 0.0005
+DATASETS:
+  TRAIN: ("densepose_coco_2014_minival_100",)
+  TEST: ("densepose_coco_2014_minival_100",)
+SOLVER:
+  CLIP_GRADIENTS:
+    ENABLED: True
+  MAX_ITER: 40 
+  STEPS: (30,)
+  WARMUP_FACTOR: 0.025
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_inference_acc_test.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_inference_acc_test.yaml
@@ -0,0 +1,8 @@
+_BASE_: "../densepose_rcnn_R_50_FPN_s1x.yaml"
+MODEL:
+  WEIGHTS: "https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_s1x/165712039/model_final_162be9.pkl"
+DATASETS:
+  TRAIN: ()
+  TEST: ("densepose_coco_2014_minival_100",)
+TEST:
+  EXPECTED_RESULTS: [["bbox", "AP", 59.27, 0.025], ["densepose_gps", "AP",  60.11, 0.02], ["densepose_gpsm", "AP", 64.20, 0.02]]
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_instant_test.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_instant_test.yaml
@@ -0,0 +1,9 @@
+_BASE_: "../Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+DATASETS:
+  TRAIN: ("densepose_coco_2014_minival_100",)
+  TEST: ("densepose_coco_2014_minival_100",)
+SOLVER:
+  MAX_ITER: 40
+  STEPS: (30,)
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_training_acc_test.yaml
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_training_acc_test.yaml
@@ -0,0 +1,14 @@
+_BASE_: "../Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  ROI_HEADS:
+    NUM_CLASSES: 1
+DATASETS:
+  TRAIN: ("densepose_coco_2014_minival",)
+  TEST: ("densepose_coco_2014_minival",)
+SOLVER:
+  MAX_ITER: 6000
+  STEPS: (5500, 5800)
+TEST:
+  EXPECTED_RESULTS: [["bbox", "AP", 58.27, 1.0], ["densepose_gps", "AP", 42.47, 1.5], ["densepose_gpsm", "AP", 49.20, 1.5]]
+
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/init.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/init.py
@@ -0,0 +1,9 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+from .data.datasets import builtin  # just to register data
+from .config import add_densepose_config, add_dataset_category_config
+from .densepose_head import ROI_DENSEPOSE_HEAD_REGISTRY
+from .evaluator import DensePoseCOCOEvaluator
+from .roi_head import DensePoseROIHeads
+from .data.structures import DensePoseDataRelative, DensePoseList, DensePoseTransformData
+from .modeling.test_time_augmentation import DensePoseGeneralizedRCNNWithTTA
+from .utils.transform import load_from_cfg
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/config.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/config.py
@@ -0,0 +1,68 @@
+# -*- coding = utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+from detectron2.config import CfgNode as CN
+
+
+def add_dataset_category_config(cfg: CN):
+    """
+    Add config for additional category-related dataset options
+     - category whitelisting
+     - category mapping
+    """
+    _C = cfg
+    _C.DATASETS.CATEGORY_MAPS = CN(new_allowed=True)
+    _C.DATASETS.WHITELISTED_CATEGORIES = CN(new_allowed=True)
+
+
+def add_densepose_config(cfg: CN):
+    """
+    Add config for densepose head.
+    """
+    _C = cfg
+
+    _C.MODEL.DENSEPOSE_ON = True
+
+    _C.MODEL.ROI_DENSEPOSE_HEAD = CN()
+    _C.MODEL.ROI_DENSEPOSE_HEAD.NAME = ""
+    _C.MODEL.ROI_DENSEPOSE_HEAD.NUM_STACKED_CONVS = 8
+    # Number of parts used for point labels
+    _C.MODEL.ROI_DENSEPOSE_HEAD.NUM_PATCHES = 24
+    _C.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL = 4
+    _C.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_DIM = 512
+    _C.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_KERNEL = 3
+    _C.MODEL.ROI_DENSEPOSE_HEAD.UP_SCALE = 2
+    _C.MODEL.ROI_DENSEPOSE_HEAD.HEATMAP_SIZE = 112
+    _C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_TYPE = "ROIAlignV2"
+    _C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_RESOLUTION = 28
+    _C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_SAMPLING_RATIO = 2
+    _C.MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS = 2  # 15 or 2
+    # Overlap threshold for an RoI to be considered foreground (if >= FG_IOU_THRESHOLD)
+    _C.MODEL.ROI_DENSEPOSE_HEAD.FG_IOU_THRESHOLD = 0.7
+    # Loss weights for annotation masks.(14 Parts)
+    _C.MODEL.ROI_DENSEPOSE_HEAD.INDEX_WEIGHTS = 5.0
+    # Loss weights for surface parts. (24 Parts)
+    _C.MODEL.ROI_DENSEPOSE_HEAD.PART_WEIGHTS = 1.0
+    # Loss weights for UV regression.
+    _C.MODEL.ROI_DENSEPOSE_HEAD.POINT_REGRESSION_WEIGHTS = 0.01
+    # For Decoder
+    _C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_ON = True
+    _C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NUM_CLASSES = 256
+    _C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_CONV_DIMS = 256
+    _C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NORM = ""
+    _C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_COMMON_STRIDE = 4
+    # For DeepLab head
+    _C.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB = CN()
+    _C.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NORM = "GN"
+    _C.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NONLOCAL_ON = 0
+    # Confidences
+    # Enable learning confidences (variances) along with the actual values
+    _C.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE = CN({"ENABLED": False})
+    # UV confidence lower bound
+    _C.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.EPSILON = 0.01
+    # Statistical model type for confidence learning, possible values:
+    # - "iid_iso": statistically independent identically distributed residuals
+    #    with isotropic covariance
+    # - "indep_aniso": statistically independent residuals with anisotropic
+    #    covariances
+    _C.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.TYPE = "iid_iso"
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/init.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/init.py
@@ -0,0 +1,9 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+from .build import build_detection_test_loader, build_detection_train_loader
+from .dataset_mapper import DatasetMapper
+
+# ensure the builtin data are registered
+from . import datasets
+
+__all__ = [k for k in globals().keys() if not k.startswith("_")]
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/build.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/build.py
@@ -0,0 +1,405 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+import itertools
+import logging
+import numpy as np
+import operator
+from typing import Any, Callable, Collection, Dict, Iterable, List, Optional
+import torch
+
+from detectron2.config import CfgNode
+from detectron2.data import samplers
+from detectron2.data.build import (
+    load_proposals_into_dataset,
+    print_instances_class_histogram,
+    trivial_batch_collator,
+    worker_init_reset_seed,
+)
+from detectron2.data.catalog import DatasetCatalog, MetadataCatalog
+from detectron2.data.common import AspectRatioGroupedDataset, DatasetFromList, MapDataset
+from detectron2.utils.comm import get_world_size
+
+from .dataset_mapper import DatasetMapper
+from .datasets.coco import DENSEPOSE_KEYS_WITHOUT_MASK as DENSEPOSE_COCO_KEYS_WITHOUT_MASK
+from .datasets.coco import DENSEPOSE_MASK_KEY as DENSEPOSE_COCO_MASK_KEY
+
+__all__ = ["build_detection_train_loader", "build_detection_test_loader"]
+
+
+Instance = Dict[str, Any]
+InstancePredicate = Callable[[Instance], bool]
+
+
+def _compute_num_images_per_worker(cfg: CfgNode):
+    num_workers = get_world_size()
+    images_per_batch = cfg.SOLVER.IMS_PER_BATCH
+    assert (
+        images_per_batch % num_workers == 0
+    ), "SOLVER.IMS_PER_BATCH ({}) must be divisible by the number of workers ({}).".format(
+        images_per_batch, num_workers
+    )
+    assert (
+        images_per_batch >= num_workers
+    ), "SOLVER.IMS_PER_BATCH ({}) must be larger than the number of workers ({}).".format(
+        images_per_batch, num_workers
+    )
+    images_per_worker = images_per_batch // num_workers
+    return images_per_worker
+
+
+def _map_category_id_to_contiguous_id(dataset_name: str, dataset_dicts: Iterable[Instance]):
+    meta = MetadataCatalog.get(dataset_name)
+    for dataset_dict in dataset_dicts:
+        for ann in dataset_dict["annotations"]:
+            ann["category_id"] = meta.thing_dataset_id_to_contiguous_id[ann["category_id"]]
+
+
+def _add_category_id_to_contiguous_id_maps_to_metadata(dataset_names: Iterable[str]):
+    # merge categories for all data
+    merged_categories = {}
+    for dataset_name in dataset_names:
+        meta = MetadataCatalog.get(dataset_name)
+        for cat_id, cat_name in meta.categories.items():
+            if cat_id not in merged_categories:
+                merged_categories[cat_id] = (cat_name, dataset_name)
+                continue
+            cat_name_other, dataset_name_other = merged_categories[cat_id]
+            if cat_name_other != cat_name:
+                raise ValueError(
+                    f"Incompatible categories for category ID {cat_id}: "
+                    f'dataset {dataset_name} value "{cat_name}", '
+                    f'dataset {dataset_name_other} value "{cat_name_other}"'
+                )
+
+    merged_cat_id_to_cont_id = {}
+    for i, cat_id in enumerate(sorted(merged_categories.keys())):
+        merged_cat_id_to_cont_id[cat_id] = i
+
+    # add category maps to metadata
+    for dataset_name in dataset_names:
+        meta = MetadataCatalog.get(dataset_name)
+        categories = meta.get("categories")
+        meta.thing_classes = [categories[cat_id] for cat_id in sorted(categories.keys())]
+        meta.thing_dataset_id_to_contiguous_id = {
+            cat_id: merged_cat_id_to_cont_id[cat_id] for cat_id in sorted(categories.keys())
+        }
+        meta.thing_contiguous_id_to_dataset_id = {
+            merged_cat_id_to_cont_id[cat_id]: cat_id for cat_id in sorted(categories.keys())
+        }
+
+
+def _maybe_create_general_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]:
+    def has_annotations(instance: Instance) -> bool:
+        return "annotations" in instance
+
+    def has_only_crowd_anotations(instance: Instance) -> bool:
+        for ann in instance["annotations"]:
+            if ann.get("is_crowd", 0) == 0:
+                return False
+        return True
+
+    def general_keep_instance_predicate(instance: Instance) -> bool:
+        return has_annotations(instance) and not has_only_crowd_anotations(instance)
+
+    if not cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS:
+        return None
+    return general_keep_instance_predicate
+
+
+def _maybe_create_keypoints_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]:
+
+    min_num_keypoints = cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE
+
+    def has_sufficient_num_keypoints(instance: Instance) -> bool:
+        num_kpts = sum(
+            (np.array(ann["keypoints"][2::3]) > 0).sum()
+            for ann in instance["annotations"]
+            if "keypoints" in ann
+        )
+        return num_kpts >= min_num_keypoints
+
+    if cfg.MODEL.KEYPOINT_ON and (min_num_keypoints > 0):
+        return has_sufficient_num_keypoints
+    return None
+
+
+def _maybe_create_mask_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]:
+    if not cfg.MODEL.MASK_ON:
+        return None
+
+    def has_mask_annotations(instance: Instance) -> bool:
+        return any("segmentation" in ann for ann in instance["annotations"])
+
+    return has_mask_annotations
+
+
+def _maybe_create_densepose_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]:
+    if not cfg.MODEL.DENSEPOSE_ON:
+        return None
+
+    def has_densepose_annotations(instance: Instance) -> bool:
+        for ann in instance["annotations"]:
+            if all(key in ann for key in DENSEPOSE_COCO_KEYS_WITHOUT_MASK) and (
+                (DENSEPOSE_COCO_MASK_KEY in ann) or ("segmentation" in ann)
+            ):
+                return True
+        return False
+
+    return has_densepose_annotations
+
+
+def _maybe_create_specific_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]:
+    specific_predicate_creators = [
+        _maybe_create_keypoints_keep_instance_predicate,
+        _maybe_create_mask_keep_instance_predicate,
+        _maybe_create_densepose_keep_instance_predicate,
+    ]
+    predicates = [creator(cfg) for creator in specific_predicate_creators]
+    predicates = [p for p in predicates if p is not None]
+    if not predicates:
+        return None
+
+    def combined_predicate(instance: Instance) -> bool:
+        return any(p(instance) for p in predicates)
+
+    return combined_predicate
+
+
+def _get_train_keep_instance_predicate(cfg: CfgNode):
+    general_keep_predicate = _maybe_create_general_keep_instance_predicate(cfg)
+    combined_specific_keep_predicate = _maybe_create_specific_keep_instance_predicate(cfg)
+
+    def combined_general_specific_keep_predicate(instance: Instance) -> bool:
+        return general_keep_predicate(instance) and combined_specific_keep_predicate(instance)
+
+    if (general_keep_predicate is None) and (combined_specific_keep_predicate is None):
+        return None
+    if general_keep_predicate is None:
+        return combined_specific_keep_predicate
+    if combined_specific_keep_predicate is None:
+        return general_keep_predicate
+    return combined_general_specific_keep_predicate
+
+
+def _get_test_keep_instance_predicate(cfg: CfgNode):
+    general_keep_predicate = _maybe_create_general_keep_instance_predicate(cfg)
+    return general_keep_predicate
+
+
+def _maybe_filter_and_map_categories(
+    dataset_name: str, dataset_dicts: List[Instance]
+) -> List[Instance]:
+    meta = MetadataCatalog.get(dataset_name)
+    whitelisted_categories = meta.get("whitelisted_categories")
+    category_map = meta.get("category_map", {})
+    if whitelisted_categories is None and not category_map:
+        return dataset_dicts
+    filtered_dataset_dicts = []
+    for dataset_dict in dataset_dicts:
+        anns = []
+        for ann in dataset_dict["annotations"]:
+            cat_id = ann["category_id"]
+            if whitelisted_categories is not None and cat_id not in whitelisted_categories:
+                continue
+            ann["category_id"] = category_map.get(cat_id, cat_id)
+            anns.append(ann)
+        dataset_dict["annotations"] = anns
+        filtered_dataset_dicts.append(dataset_dict)
+    return filtered_dataset_dicts
+
+
+def _add_category_whitelists_to_metadata(cfg: CfgNode):
+    for dataset_name, whitelisted_cat_ids in cfg.DATASETS.WHITELISTED_CATEGORIES.items():
+        meta = MetadataCatalog.get(dataset_name)
+        meta.whitelisted_categories = whitelisted_cat_ids
+        logger = logging.getLogger(__name__)
+        logger.info(
+            "Whitelisted categories for dataset {}: {}".format(
+                dataset_name, meta.whitelisted_categories
+            )
+        )
+
+
+def _add_category_maps_to_metadata(cfg: CfgNode):
+    for dataset_name, category_map in cfg.DATASETS.CATEGORY_MAPS.items():
+        category_map = {
+            int(cat_id_src): int(cat_id_dst) for cat_id_src, cat_id_dst in category_map.items()
+        }
+        meta = MetadataCatalog.get(dataset_name)
+        meta.category_map = category_map
+        logger = logging.getLogger(__name__)
+        logger.info("Category maps for dataset {}: {}".format(dataset_name, meta.category_map))
+
+
+def combine_detection_dataset_dicts(
+    dataset_names: Collection[str],
+    keep_instance_predicate: Optional[InstancePredicate] = None,
+    proposal_files: Optional[Collection[str]] = None,
+) -> List[Instance]:
+    """
+    Load and prepare dataset dicts for training / testing
+
+    Args:
+        dataset_names (Collection[str]): a list of dataset names
+        keep_instance_predicate (Callable: Dict[str, Any] -> bool): predicate
+            applied to instance dicts which defines whether to keep the instance
+        proposal_files (Collection[str]): if given, a list of object proposal files
+            that match each dataset in `dataset_names`.
+    """
+    assert len(dataset_names)
+    if proposal_files is None:
+        proposal_files = [None] * len(dataset_names)
+    assert len(dataset_names) == len(proposal_files)
+    # load annotations and dataset metadata
+    dataset_map = {}
+    for dataset_name in dataset_names:
+        dataset_dicts = DatasetCatalog.get(dataset_name)
+        dataset_map[dataset_name] = dataset_dicts
+    # initialize category maps
+    _add_category_id_to_contiguous_id_maps_to_metadata(dataset_names)
+    # apply category maps
+    all_datasets_dicts = []
+    for dataset_name, proposal_file in zip(dataset_names, proposal_files):
+        dataset_dicts = dataset_map[dataset_name]
+        assert len(dataset_dicts), f"Dataset '{dataset_name}' is empty!"
+        if proposal_file is not None:
+            dataset_dicts = load_proposals_into_dataset(dataset_dicts, proposal_file)
+        dataset_dicts = _maybe_filter_and_map_categories(dataset_name, dataset_dicts)
+        _map_category_id_to_contiguous_id(dataset_name, dataset_dicts)
+        print_instances_class_histogram(
+            dataset_dicts, MetadataCatalog.get(dataset_name).thing_classes
+        )
+        all_datasets_dicts.append(dataset_dicts)
+
+    if keep_instance_predicate is not None:
+        all_datasets_dicts_plain = [
+            d
+            for d in itertools.chain.from_iterable(all_datasets_dicts)
+            if keep_instance_predicate(d)
+        ]
+    else:
+        all_datasets_dicts_plain = list(itertools.chain.from_iterable(all_datasets_dicts))
+    return all_datasets_dicts_plain
+
+
+def build_detection_train_loader(cfg: CfgNode, mapper=None):
+    """
+    A data loader is created in a way similar to that of Detectron2.
+    The main differences are:
+     - it allows to combine data with different but compatible object category sets
+
+    The data loader is created by the following steps:
+    1. Use the dataset names in config to query :class:`DatasetCatalog`, and obtain a list of dicts.
+    2. Start workers to work on the dicts. Each worker will:
+        * Map each metadata dict into another format to be consumed by the model.
+        * Batch them by simply putting dicts into a list.
+    The batched ``list[mapped_dict]`` is what this dataloader will return.
+
+    Args:
+        cfg (CfgNode): the config
+        mapper (callable): a callable which takes a sample (dict) from dataset and
+            returns the format to be consumed by the model.
+            By default it will be `DatasetMapper(cfg, True)`.
+
+    Returns:
+        an infinite iterator of training data
+    """
+    images_per_worker = _compute_num_images_per_worker(cfg)
+
+    _add_category_whitelists_to_metadata(cfg)
+    _add_category_maps_to_metadata(cfg)
+    dataset_dicts = combine_detection_dataset_dicts(
+        cfg.DATASETS.TRAIN,
+        keep_instance_predicate=_get_train_keep_instance_predicate(cfg),
+        proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None,
+    )
+    dataset = DatasetFromList(dataset_dicts, copy=False)
+
+    if mapper is None:
+        mapper = DatasetMapper(cfg, True)
+    dataset = MapDataset(dataset, mapper)
+
+    sampler_name = cfg.DATALOADER.SAMPLER_TRAIN
+    logger = logging.getLogger(__name__)
+    logger.info("Using training sampler {}".format(sampler_name))
+    if sampler_name == "TrainingSampler":
+        sampler = samplers.TrainingSampler(len(dataset))
+    elif sampler_name == "RepeatFactorTrainingSampler":
+        sampler = samplers.RepeatFactorTrainingSampler(
+            dataset_dicts, cfg.DATALOADER.REPEAT_THRESHOLD
+        )
+    else:
+        raise ValueError("Unknown training sampler: {}".format(sampler_name))
+
+    if cfg.DATALOADER.ASPECT_RATIO_GROUPING:
+        data_loader = torch.utils.data.DataLoader(
+            dataset,
+            sampler=sampler,
+            num_workers=cfg.DATALOADER.NUM_WORKERS,
+            batch_sampler=None,
+            collate_fn=operator.itemgetter(0),  # don't batch, but yield individual elements
+            worker_init_fn=worker_init_reset_seed,
+        )  # yield individual mapped dict
+        data_loader = AspectRatioGroupedDataset(data_loader, images_per_worker)
+    else:
+        batch_sampler = torch.utils.data.sampler.BatchSampler(
+            sampler, images_per_worker, drop_last=True
+        )
+        # drop_last so the batch always have the same size
+        data_loader = torch.utils.data.DataLoader(
+            dataset,
+            num_workers=cfg.DATALOADER.NUM_WORKERS,
+            batch_sampler=batch_sampler,
+            collate_fn=trivial_batch_collator,
+            worker_init_fn=worker_init_reset_seed,
+        )
+
+    return data_loader
+
+
+def build_detection_test_loader(cfg, dataset_name, mapper=None):
+    """
+    Similar to `build_detection_train_loader`.
+    But this function uses the given `dataset_name` argument (instead of the names in cfg),
+    and uses batch size 1.
+
+    Args:
+        cfg: a detectron2 CfgNode
+        dataset_name (str): a name of the dataset that's available in the DatasetCatalog
+        mapper (callable): a callable which takes a sample (dict) from dataset
+            and returns the format to be consumed by the model.
+            By default it will be `DatasetMapper(cfg, False)`.
+
+    Returns:
+        DataLoader: a torch DataLoader, that loads the given detection
+            dataset, with test-time transformation and batching.
+    """
+    _add_category_whitelists_to_metadata(cfg)
+    _add_category_maps_to_metadata(cfg)
+    dataset_dicts = combine_detection_dataset_dicts(
+        [dataset_name],
+        keep_instance_predicate=_get_test_keep_instance_predicate(cfg),
+        proposal_files=[
+            cfg.DATASETS.PROPOSAL_FILES_TEST[list(cfg.DATASETS.TEST).index(dataset_name)]
+        ]
+        if cfg.MODEL.LOAD_PROPOSALS
+        else None,
+    )
+
+    dataset = DatasetFromList(dataset_dicts)
+    if mapper is None:
+        mapper = DatasetMapper(cfg, False)
+    dataset = MapDataset(dataset, mapper)
+
+    sampler = samplers.InferenceSampler(len(dataset))
+    # Always use 1 image per worker during inference since this is the
+    # standard when reporting inference time in papers.
+    batch_sampler = torch.utils.data.sampler.BatchSampler(sampler, 1, drop_last=False)
+
+    data_loader = torch.utils.data.DataLoader(
+        dataset,
+        num_workers=cfg.DATALOADER.NUM_WORKERS,
+        batch_sampler=batch_sampler,
+        collate_fn=trivial_batch_collator,
+    )
+    return data_loader
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/dataset_mapper.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/dataset_mapper.py
@@ -0,0 +1,118 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+import copy
+import torch
+from fvcore.common.file_io import PathManager
+
+from detectron2.data import MetadataCatalog
+from detectron2.data import detection_utils as utils
+from detectron2.data import transforms as T
+
+from .structures import DensePoseDataRelative, DensePoseList, DensePoseTransformData
+
+
+class DatasetMapper:
+    """
+    A customized version of `detectron2.data.DatasetMapper`
+    """
+
+    def __init__(self, cfg, is_train=True):
+        self.tfm_gens = utils.build_transform_gen(cfg, is_train)
+
+        # fmt: off
+        self.img_format     = cfg.INPUT.FORMAT
+        self.mask_on        = cfg.MODEL.MASK_ON
+        self.keypoint_on    = cfg.MODEL.KEYPOINT_ON
+        self.densepose_on   = cfg.MODEL.DENSEPOSE_ON
+        assert not cfg.MODEL.LOAD_PROPOSALS, "not supported yet"
+        # fmt: on
+        if self.keypoint_on and is_train:
+            # Flip only makes sense in training
+            self.keypoint_hflip_indices = utils.create_keypoint_hflip_indices(cfg.DATASETS.TRAIN)
+        else:
+            self.keypoint_hflip_indices = None
+
+        if self.densepose_on:
+            densepose_transform_srcs = [
+                MetadataCatalog.get(ds).densepose_transform_src
+                for ds in cfg.DATASETS.TRAIN + cfg.DATASETS.TEST
+            ]
+            assert len(densepose_transform_srcs) > 0
+            # TODO: check that DensePose transformation data is the same for
+            # all the data. Otherwise one would have to pass DB ID with
+            # each entry to select proper transformation data. For now, since
+            # all DensePose annotated data uses the same data semantics, we
+            # omit this check.
+            densepose_transform_data_fpath = PathManager.get_local_path(densepose_transform_srcs[0])
+            self.densepose_transform_data = DensePoseTransformData.load(
+                densepose_transform_data_fpath
+            )
+
+        self.is_train = is_train
+
+    def __call__(self, dataset_dict):
+        """
+        Args:
+            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
+
+        Returns:
+            dict: a format that builtin models in detectron2 accept
+        """
+        dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below
+        image = utils.read_image(dataset_dict["file_name"], format=self.img_format)
+        utils.check_image_size(dataset_dict, image)
+
+        image, transforms = T.apply_transform_gens(self.tfm_gens, image)
+        image_shape = image.shape[:2]  # h, w
+        dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32"))
+
+        if not self.is_train:
+            dataset_dict.pop("annotations", None)
+            return dataset_dict
+
+        for anno in dataset_dict["annotations"]:
+            if not self.mask_on:
+                anno.pop("segmentation", None)
+            if not self.keypoint_on:
+                anno.pop("keypoints", None)
+
+        # USER: Implement additional transformations if you have other types of data
+        # USER: Don't call transpose_densepose if you don't need
+        annos = [
+            self._transform_densepose(
+                utils.transform_instance_annotations(
+                    obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices
+                ),
+                transforms,
+            )
+            for obj in dataset_dict.pop("annotations")
+            if obj.get("iscrowd", 0) == 0
+        ]
+        instances = utils.annotations_to_instances(annos, image_shape)
+
+        if len(annos) and "densepose" in annos[0]:
+            gt_densepose = [obj["densepose"] for obj in annos]
+            instances.gt_densepose = DensePoseList(gt_densepose, instances.gt_boxes, image_shape)
+
+        dataset_dict["instances"] = instances[instances.gt_boxes.nonempty()]
+        return dataset_dict
+
+    def _transform_densepose(self, annotation, transforms):
+        if not self.densepose_on:
+            return annotation
+
+        # Handle densepose annotations
+        is_valid, reason_not_valid = DensePoseDataRelative.validate_annotation(annotation)
+        if is_valid:
+            densepose_data = DensePoseDataRelative(annotation, cleanup=True)
+            densepose_data.apply_transform(transforms, self.densepose_transform_data)
+            annotation["densepose"] = densepose_data
+        else:
+            # logger = logging.getLogger(__name__)
+            # logger.debug("Could not load DensePose annotation: {}".format(reason_not_valid))
+            DensePoseDataRelative.cleanup_annotation(annotation)
+            # NOTE: annotations for certain instances may be unavailable.
+            # 'None' is accepted by the DensePostList data structure.
+            annotation["densepose"] = None
+        return annotation
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/datasets/init.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/datasets/init.py
@@ -0,0 +1,5 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+from . import builtin  # ensure the builtin data are registered
+
+__all__ = [k for k in globals().keys() if "builtin" not in k and not k.startswith("_")]
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/datasets/builtin.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/datasets/builtin.py
@@ -0,0 +1,10 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+from .coco import BASE_DATASETS as BASE_COCO_DATASETS
+from .coco import DATASETS as COCO_DATASETS
+from .coco import register_datasets as register_coco_datasets
+
+DEFAULT_DATASETS_ROOT = "data"
+
+
+register_coco_datasets(COCO_DATASETS, DEFAULT_DATASETS_ROOT)
+register_coco_datasets(BASE_COCO_DATASETS, DEFAULT_DATASETS_ROOT)
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/datasets/coco.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/datasets/coco.py
@@ -0,0 +1,314 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+import contextlib
+import io
+import logging
+import os
+from dataclasses import dataclass
+from typing import Any, Dict, Iterable, List, Optional
+from fvcore.common.file_io import PathManager
+from fvcore.common.timer import Timer
+
+from detectron2.data import DatasetCatalog, MetadataCatalog
+from detectron2.structures import BoxMode
+
+DENSEPOSE_MASK_KEY = "dp_masks"
+DENSEPOSE_KEYS_WITHOUT_MASK = ["dp_x", "dp_y", "dp_I", "dp_U", "dp_V"]
+DENSEPOSE_KEYS = DENSEPOSE_KEYS_WITHOUT_MASK + [DENSEPOSE_MASK_KEY]
+DENSEPOSE_METADATA_URL_PREFIX = "https://dl.fbaipublicfiles.com/densepose/data/"
+
+
+@dataclass
+class CocoDatasetInfo:
+    name: str
+    images_root: str
+    annotations_fpath: str
+
+
+DATASETS = [
+    CocoDatasetInfo(
+        name="densepose_coco_2014_train",
+        images_root="coco/train2014",
+        annotations_fpath="coco/annotations/densepose_train2014.json",
+    ),
+    CocoDatasetInfo(
+        name="densepose_coco_2014_minival",
+        images_root="coco/val2014",
+        annotations_fpath="coco/annotations/densepose_minival2014.json",
+    ),
+    CocoDatasetInfo(
+        name="densepose_coco_2014_minival_100",
+        images_root="coco/val2014",
+        annotations_fpath="coco/annotations/densepose_minival2014_100.json",
+    ),
+    CocoDatasetInfo(
+        name="densepose_coco_2014_valminusminival",
+        images_root="coco/val2014",
+        annotations_fpath="coco/annotations/densepose_valminusminival2014.json",
+    ),
+    CocoDatasetInfo(
+        name="densepose_chimps",
+        images_root="densepose_evolution/densepose_chimps",
+        annotations_fpath="densepose_evolution/annotations/densepose_chimps_densepose.json",
+    ),
+]
+
+
+BASE_DATASETS = [
+    CocoDatasetInfo(
+        name="base_coco_2017_train",
+        images_root="coco/train2017",
+        annotations_fpath="coco/annotations/instances_train2017.json",
+    ),
+    CocoDatasetInfo(
+        name="base_coco_2017_val",
+        images_root="coco/val2017",
+        annotations_fpath="coco/annotations/instances_val2017.json",
+    ),
+    CocoDatasetInfo(
+        name="base_coco_2017_val_100",
+        images_root="coco/val2017",
+        annotations_fpath="coco/annotations/instances_val2017_100.json",
+    ),
+]
+
+
+def _is_relative_local_path(path: os.PathLike):
+    path_str = os.fsdecode(path)
+    return ("://" not in path_str) and not os.path.isabs(path)
+
+
+def _maybe_prepend_base_path(base_path: Optional[os.PathLike], path: os.PathLike):
+    """
+    Prepends the provided path with a base path prefix if:
+    1) base path is not None;
+    2) path is a local path
+    """
+    if base_path is None:
+        return path
+    if _is_relative_local_path(path):
+        return os.path.join(base_path, path)
+    return path
+
+
+def get_metadata(base_path: Optional[os.PathLike]) -> Dict[str, Any]:
+    """
+    Returns metadata associated with COCO DensePose data
+
+    Args:
+    base_path: Optional[os.PathLike]
+        Base path used to load metadata from
+
+    Returns:
+    Dict[str, Any]
+        Metadata in the form of a dictionary
+    """
+    meta = {
+        "densepose_transform_src": _maybe_prepend_base_path(
+            base_path, "UV_symmetry_transforms.mat"
+        ),
+        "densepose_smpl_subdiv": _maybe_prepend_base_path(base_path, "SMPL_subdiv.mat"),
+        "densepose_smpl_subdiv_transform": _maybe_prepend_base_path(
+            base_path, "SMPL_SUBDIV_TRANSFORM.mat"
+        ),
+    }
+    return meta
+
+
+def _load_coco_annotations(json_file: str):
+    """
+    Load COCO annotations from a JSON file
+
+    Args:
+        json_file: str
+            Path to the file to load annotations from
+    Returns:
+        Instance of `pycocotools.coco.COCO` that provides access to annotations
+        data
+    """
+    from pycocotools.coco import COCO
+
+    logger = logging.getLogger(__name__)
+    timer = Timer()
+    with contextlib.redirect_stdout(io.StringIO()):
+        coco_api = COCO(json_file)
+    if timer.seconds() > 1:
+        logger.info("Loading {} takes {:.2f} seconds.".format(json_file, timer.seconds()))
+    return coco_api
+
+
+def _add_categories_metadata(dataset_name: str, categories: Dict[str, Any]):
+    meta = MetadataCatalog.get(dataset_name)
+    meta.categories = {c["id"]: c["name"] for c in categories}
+    logger = logging.getLogger(__name__)
+    logger.info("Dataset {} categories: {}".format(dataset_name, categories))
+
+
+def _verify_annotations_have_unique_ids(json_file: str, anns: List[List[Dict[str, Any]]]):
+    if "minival" in json_file:
+        # Skip validation on COCO2014 valminusminival and minival annotations
+        # The ratio of buggy annotations there is tiny and does not affect accuracy
+        # Therefore we explicitly white-list them
+        return
+    ann_ids = [ann["id"] for anns_per_image in anns for ann in anns_per_image]
+    assert len(set(ann_ids)) == len(ann_ids), "Annotation ids in '{}' are not unique!".format(
+        json_file
+    )
+
+
+def _maybe_add_bbox(obj: Dict[str, Any], ann_dict: Dict[str, Any]):
+    if "bbox" not in ann_dict:
+        return
+    obj["bbox"] = ann_dict["bbox"]
+    obj["bbox_mode"] = BoxMode.XYWH_ABS
+
+
+def _maybe_add_segm(obj: Dict[str, Any], ann_dict: Dict[str, Any]):
+    if "segmentation" not in ann_dict:
+        return
+    segm = ann_dict["segmentation"]
+    if not isinstance(segm, dict):
+        # filter out invalid polygons (< 3 points)
+        segm = [poly for poly in segm if len(poly) % 2 == 0 and len(poly) >= 6]
+        if len(segm) == 0:
+            return
+    obj["segmentation"] = segm
+
+
+def _maybe_add_keypoints(obj: Dict[str, Any], ann_dict: Dict[str, Any]):
+    if "keypoints" not in ann_dict:
+        return
+    keypts = ann_dict["keypoints"]  # list[int]
+    for idx, v in enumerate(keypts):
+        if idx % 3 != 2:
+            # COCO's segmentation coordinates are floating points in [0, H or W],
+            # but keypoint coordinates are integers in [0, H-1 or W-1]
+            # Therefore we assume the coordinates are "pixel indices" and
+            # add 0.5 to convert to floating point coordinates.
+            keypts[idx] = v + 0.5
+    obj["keypoints"] = keypts
+
+
+def _maybe_add_densepose(obj: Dict[str, Any], ann_dict: Dict[str, Any]):
+    for key in DENSEPOSE_KEYS:
+        if key in ann_dict:
+            obj[key] = ann_dict[key]
+
+
+def _combine_images_with_annotations(
+    dataset_name: str,
+    image_root: str,
+    img_datas: Iterable[Dict[str, Any]],
+    ann_datas: Iterable[Iterable[Dict[str, Any]]],
+):
+
+    ann_keys = ["iscrowd", "category_id"]
+    dataset_dicts = []
+
+    for img_dict, ann_dicts in zip(img_datas, ann_datas):
+        record = {}
+        record["file_name"] = os.path.join(image_root, img_dict["file_name"])
+        record["height"] = img_dict["height"]
+        record["width"] = img_dict["width"]
+        record["image_id"] = img_dict["id"]
+        record["dataset"] = dataset_name
+        objs = []
+        for ann_dict in ann_dicts:
+            assert ann_dict["image_id"] == record["image_id"]
+            assert ann_dict.get("ignore", 0) == 0
+            obj = {key: ann_dict[key] for key in ann_keys if key in ann_dict}
+            _maybe_add_bbox(obj, ann_dict)
+            _maybe_add_segm(obj, ann_dict)
+            _maybe_add_keypoints(obj, ann_dict)
+            _maybe_add_densepose(obj, ann_dict)
+            objs.append(obj)
+        record["annotations"] = objs
+        dataset_dicts.append(record)
+    return dataset_dicts
+
+
+def load_coco_json(annotations_json_file: str, image_root: str, dataset_name: str):
+    """
+    Loads a JSON file with annotations in COCO instances format.
+    Replaces `detectron2.data.data.coco.load_coco_json` to handle metadata
+    in a more flexible way. Postpones category mapping to a later stage to be
+    able to combine several data with different (but coherent) sets of
+    categories.
+
+    Args:
+
+    annotations_json_file: str
+        Path to the JSON file with annotations in COCO instances format.
+    image_root: str
+        directory that contains all the images
+    dataset_name: str
+        the name that identifies a dataset, e.g. "densepose_coco_2014_train"
+    extra_annotation_keys: Optional[List[str]]
+        If provided, these keys are used to extract additional data from
+        the annotations.
+    """
+    coco_api = _load_coco_annotations(PathManager.get_local_path(annotations_json_file))
+    _add_categories_metadata(dataset_name, coco_api.loadCats(coco_api.getCatIds()))
+    # sort indices for reproducible results
+    img_ids = sorted(coco_api.imgs.keys())
+    # imgs is a list of dicts, each looks something like:
+    # {'license': 4,
+    #  'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg',
+    #  'file_name': 'COCO_val2014_000000001268.jpg',
+    #  'height': 427,
+    #  'width': 640,
+    #  'date_captured': '2013-11-17 05:57:24',
+    #  'id': 1268}
+    imgs = coco_api.loadImgs(img_ids)
+    logger = logging.getLogger(__name__)
+    logger.info("Loaded {} images in COCO format from {}".format(len(imgs), annotations_json_file))
+    # anns is a list[list[dict]], where each dict is an annotation
+    # record for an object. The inner list enumerates the objects in an image
+    # and the outer list enumerates over images.
+    anns = [coco_api.imgToAnns[img_id] for img_id in img_ids]
+    _verify_annotations_have_unique_ids(annotations_json_file, anns)
+    dataset_records = _combine_images_with_annotations(dataset_name, image_root, imgs, anns)
+    return dataset_records
+
+
+def register_dataset(dataset_data: CocoDatasetInfo, datasets_root: Optional[os.PathLike] = None):
+    """
+    Registers provided COCO DensePose dataset
+
+    Args:
+    dataset_data: CocoDatasetInfo
+        Dataset data
+    datasets_root: Optional[os.PathLike]
+        Datasets root folder (default: None)
+    """
+    annotations_fpath = _maybe_prepend_base_path(datasets_root, dataset_data.annotations_fpath)
+    images_root = _maybe_prepend_base_path(datasets_root, dataset_data.images_root)
+
+    def load_annotations():
+        return load_coco_json(
+            annotations_json_file=annotations_fpath,
+            image_root=images_root,
+            dataset_name=dataset_data.name,
+        )
+
+    DatasetCatalog.register(dataset_data.name, load_annotations)
+    MetadataCatalog.get(dataset_data.name).set(
+        json_file=annotations_fpath,
+        image_root=images_root,
+        **get_metadata(DENSEPOSE_METADATA_URL_PREFIX)
+    )
+
+
+def register_datasets(
+    datasets_data: Iterable[CocoDatasetInfo], datasets_root: Optional[os.PathLike] = None
+):
+    """
+    Registers provided COCO DensePose data
+
+    Args:
+    datasets_data: Iterable[CocoDatasetInfo]
+        An iterable of dataset datas
+    datasets_root: Optional[os.PathLike]
+        Datasets root folder (default: None)
+    """
+    for dataset_data in datasets_data:
+        register_dataset(dataset_data, datasets_root)
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/structures.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/structures.py
@@ -0,0 +1,579 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+import base64
+import numpy as np
+from io import BytesIO
+import torch
+from PIL import Image
+from torch.nn import functional as F
+
+
+class DensePoseTransformData(object):
+
+    # Horizontal symmetry label transforms used for horizontal flip
+    MASK_LABEL_SYMMETRIES = [0, 1, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 14]
+    # fmt: off
+    POINT_LABEL_SYMMETRIES = [ 0, 1, 2, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15, 18, 17, 20, 19, 22, 21, 24, 23]  # noqa
+    # fmt: on
+
+    def __init__(self, uv_symmetries):
+        self.mask_label_symmetries = DensePoseTransformData.MASK_LABEL_SYMMETRIES
+        self.point_label_symmetries = DensePoseTransformData.POINT_LABEL_SYMMETRIES
+        self.uv_symmetries = uv_symmetries
+
+    @staticmethod
+    def load(fpath):
+        import scipy.io
+
+        uv_symmetry_map = scipy.io.loadmat(fpath)
+        uv_symmetry_map_torch = {}
+        for key in ["U_transforms", "V_transforms"]:
+            uv_symmetry_map_torch[key] = []
+            map_src = uv_symmetry_map[key]
+            map_dst = uv_symmetry_map_torch[key]
+            for i in range(map_src.shape[1]):
+                map_dst.append(torch.from_numpy(map_src[0, i]).to(dtype=torch.float))
+            uv_symmetry_map_torch[key] = torch.stack(map_dst, dim=0).to(
+                device=torch.cuda.current_device()
+            )
+        transform_data = DensePoseTransformData(uv_symmetry_map_torch)
+        return transform_data
+
+
+class DensePoseDataRelative(object):
+    """
+    Dense pose relative annotations that can be applied to any bounding box:
+        x - normalized X coordinates [0, 255] of annotated points
+        y - normalized Y coordinates [0, 255] of annotated points
+        i - body part labels 0,...,24 for annotated points
+        u - body part U coordinates [0, 1] for annotated points
+        v - body part V coordinates [0, 1] for annotated points
+        segm - 256x256 segmentation mask with values 0,...,14
+    To obtain absolute x and y data wrt some bounding box one needs to first
+    divide the data by 256, multiply by the respective bounding box size
+    and add bounding box offset:
+        x_img = x0 + x_norm * w / 256.0
+        y_img = y0 + y_norm * h / 256.0
+    Segmentation masks are typically sampled to get image-based masks.
+    """
+
+    # Key for normalized X coordinates in annotation dict
+    X_KEY = "dp_x"
+    # Key for normalized Y coordinates in annotation dict
+    Y_KEY = "dp_y"
+    # Key for U part coordinates in annotation dict
+    U_KEY = "dp_U"
+    # Key for V part coordinates in annotation dict
+    V_KEY = "dp_V"
+    # Key for I point labels in annotation dict
+    I_KEY = "dp_I"
+    # Key for segmentation mask in annotation dict
+    S_KEY = "dp_masks"
+    # Number of body parts in segmentation masks
+    N_BODY_PARTS = 14
+    # Number of parts in point labels
+    N_PART_LABELS = 24
+    MASK_SIZE = 256
+
+    def __init__(self, annotation, cleanup=False):
+        is_valid, reason_not_valid = DensePoseDataRelative.validate_annotation(annotation)
+        assert is_valid, "Invalid DensePose annotations: {}".format(reason_not_valid)
+        self.x = torch.as_tensor(annotation[DensePoseDataRelative.X_KEY])
+        self.y = torch.as_tensor(annotation[DensePoseDataRelative.Y_KEY])
+        self.i = torch.as_tensor(annotation[DensePoseDataRelative.I_KEY])
+        self.u = torch.as_tensor(annotation[DensePoseDataRelative.U_KEY])
+        self.v = torch.as_tensor(annotation[DensePoseDataRelative.V_KEY])
+        self.segm = DensePoseDataRelative.extract_segmentation_mask(annotation)
+        self.device = torch.device("cpu")
+        if cleanup:
+            DensePoseDataRelative.cleanup_annotation(annotation)
+
+    def to(self, device):
+        if self.device == device:
+            return self
+        new_data = DensePoseDataRelative.__new__(DensePoseDataRelative)
+        new_data.x = self.x
+        new_data.x = self.x.to(device)
+        new_data.y = self.y.to(device)
+        new_data.i = self.i.to(device)
+        new_data.u = self.u.to(device)
+        new_data.v = self.v.to(device)
+        new_data.segm = self.segm.to(device)
+        new_data.device = device
+        return new_data
+
+    @staticmethod
+    def extract_segmentation_mask(annotation):
+        import pycocotools.mask as mask_utils
+
+        poly_specs = annotation[DensePoseDataRelative.S_KEY]
+        segm = torch.zeros((DensePoseDataRelative.MASK_SIZE,) * 2, dtype=torch.float32)
+        for i in range(DensePoseDataRelative.N_BODY_PARTS):
+            poly_i = poly_specs[i]
+            if poly_i:
+                mask_i = mask_utils.decode(poly_i)
+                segm[mask_i > 0] = i + 1
+        return segm
+
+    @staticmethod
+    def validate_annotation(annotation):
+        for key in [
+            DensePoseDataRelative.X_KEY,
+            DensePoseDataRelative.Y_KEY,
+            DensePoseDataRelative.I_KEY,
+            DensePoseDataRelative.U_KEY,
+            DensePoseDataRelative.V_KEY,
+            DensePoseDataRelative.S_KEY,
+        ]:
+            if key not in annotation:
+                return False, "no {key} data in the annotation".format(key=key)
+        return True, None
+
+    @staticmethod
+    def cleanup_annotation(annotation):
+        for key in [
+            DensePoseDataRelative.X_KEY,
+            DensePoseDataRelative.Y_KEY,
+            DensePoseDataRelative.I_KEY,
+            DensePoseDataRelative.U_KEY,
+            DensePoseDataRelative.V_KEY,
+            DensePoseDataRelative.S_KEY,
+        ]:
+            if key in annotation:
+                del annotation[key]
+
+    def apply_transform(self, transforms, densepose_transform_data):
+        self._transform_pts(transforms, densepose_transform_data)
+        self._transform_segm(transforms, densepose_transform_data)
+
+    def _transform_pts(self, transforms, dp_transform_data):
+        import detectron2.data.transforms as T
+
+        # NOTE: This assumes that HorizFlipTransform is the only one that does flip
+        do_hflip = sum(isinstance(t, T.HFlipTransform) for t in transforms.transforms) % 2 == 1
+        if do_hflip:
+            self.x = self.segm.size(1) - self.x
+            self._flip_iuv_semantics(dp_transform_data)
+
+    def _flip_iuv_semantics(self, dp_transform_data: DensePoseTransformData) -> None:
+        i_old = self.i.clone()
+        uv_symmetries = dp_transform_data.uv_symmetries
+        pt_label_symmetries = dp_transform_data.point_label_symmetries
+        for i in range(self.N_PART_LABELS):
+            if i + 1 in i_old:
+                annot_indices_i = i_old == i + 1
+                if pt_label_symmetries[i + 1] != i + 1:
+                    self.i[annot_indices_i] = pt_label_symmetries[i + 1]
+                u_loc = (self.u[annot_indices_i] * 255).long()
+                v_loc = (self.v[annot_indices_i] * 255).long()
+                self.u[annot_indices_i] = uv_symmetries["U_transforms"][i][v_loc, u_loc].to(
+                    device=self.u.device
+                )
+                self.v[annot_indices_i] = uv_symmetries["V_transforms"][i][v_loc, u_loc].to(
+                    device=self.v.device
+                )
+
+    def _transform_segm(self, transforms, dp_transform_data):
+        import detectron2.data.transforms as T
+
+        # NOTE: This assumes that HorizFlipTransform is the only one that does flip
+        do_hflip = sum(isinstance(t, T.HFlipTransform) for t in transforms.transforms) % 2 == 1
+        if do_hflip:
+            self.segm = torch.flip(self.segm, [1])
+            self._flip_segm_semantics(dp_transform_data)
+
+    def _flip_segm_semantics(self, dp_transform_data):
+        old_segm = self.segm.clone()
+        mask_label_symmetries = dp_transform_data.mask_label_symmetries
+        for i in range(self.N_BODY_PARTS):
+            if mask_label_symmetries[i + 1] != i + 1:
+                self.segm[old_segm == i + 1] = mask_label_symmetries[i + 1]
+
+
+def normalized_coords_transform(x0, y0, w, h):
+    """
+    Coordinates transform that maps top left corner to (-1, -1) and bottom
+    right corner to (1, 1). Used for torch.grid_sample to initialize the
+    grid
+    """
+
+    def f(p):
+        return (2 * (p[0] - x0) / w - 1, 2 * (p[1] - y0) / h - 1)
+
+    return f
+
+
+class DensePoseOutput(object):
+    def __init__(self, S, I, U, V, confidences):
+        """
+        Args:
+            S (`torch.Tensor`): coarse segmentation tensor of size (N, A, H, W)
+            I (`torch.Tensor`): fine segmentation tensor of size (N, C, H, W)
+            U (`torch.Tensor`): U coordinates for each fine segmentation label of size (N, C, H, W)
+            V (`torch.Tensor`): V coordinates for each fine segmentation label of size (N, C, H, W)
+            confidences (dict of str -> `torch.Tensor`) estimated confidence model parameters
+        """
+        self.S = S
+        self.I = I  # noqa: E741
+        self.U = U
+        self.V = V
+        self.confidences = confidences
+        self._check_output_dims(S, I, U, V)
+
+    def _check_output_dims(self, S, I, U, V):
+        assert (
+            len(S.size()) == 4
+        ), "Segmentation output should have 4 " "dimensions (NCHW), but has size {}".format(
+            S.size()
+        )
+        assert (
+            len(I.size()) == 4
+        ), "Segmentation output should have 4 " "dimensions (NCHW), but has size {}".format(
+            S.size()
+        )
+        assert (
+            len(U.size()) == 4
+        ), "Segmentation output should have 4 " "dimensions (NCHW), but has size {}".format(
+            S.size()
+        )
+        assert (
+            len(V.size()) == 4
+        ), "Segmentation output should have 4 " "dimensions (NCHW), but has size {}".format(
+            S.size()
+        )
+        assert len(S) == len(I), (
+            "Number of output segmentation planes {} "
+            "should be equal to the number of output part index "
+            "planes {}".format(len(S), len(I))
+        )
+        assert S.size()[2:] == I.size()[2:], (
+            "Output segmentation plane size {} "
+            "should be equal to the output part index "
+            "plane size {}".format(S.size()[2:], I.size()[2:])
+        )
+        assert I.size() == U.size(), (
+            "Part index output shape {} "
+            "should be the same as U coordinates output shape {}".format(I.size(), U.size())
+        )
+        assert I.size() == V.size(), (
+            "Part index output shape {} "
+            "should be the same as V coordinates output shape {}".format(I.size(), V.size())
+        )
+
+    def resize(self, image_size_hw):
+        # do nothing - outputs are invariant to resize
+        pass
+
+    def _crop(self, S, I, U, V, bbox_old_xywh, bbox_new_xywh):
+        """
+        Resample S, I, U, V from bbox_old to the cropped bbox_new
+        """
+        x0old, y0old, wold, hold = bbox_old_xywh
+        x0new, y0new, wnew, hnew = bbox_new_xywh
+        tr_coords = normalized_coords_transform(x0old, y0old, wold, hold)
+        topleft = (x0new, y0new)
+        bottomright = (x0new + wnew, y0new + hnew)
+        topleft_norm = tr_coords(topleft)
+        bottomright_norm = tr_coords(bottomright)
+        hsize = S.size(1)
+        wsize = S.size(2)
+        grid = torch.meshgrid(
+            torch.arange(
+                topleft_norm[1],
+                bottomright_norm[1],
+                (bottomright_norm[1] - topleft_norm[1]) / hsize,
+            )[:hsize],
+            torch.arange(
+                topleft_norm[0],
+                bottomright_norm[0],
+                (bottomright_norm[0] - topleft_norm[0]) / wsize,
+            )[:wsize],
+        )
+        grid = torch.stack(grid, dim=2).to(S.device)
+        assert (
+            grid.size(0) == hsize
+        ), "Resampled grid expected " "height={}, actual height={}".format(hsize, grid.size(0))
+        assert grid.size(1) == wsize, "Resampled grid expected " "width={}, actual width={}".format(
+            wsize, grid.size(1)
+        )
+        S_new = F.grid_sample(
+            S.unsqueeze(0),
+            torch.unsqueeze(grid, 0),
+            mode="bilinear",
+            padding_mode="border",
+            align_corners=True,
+        ).squeeze(0)
+        I_new = F.grid_sample(
+            I.unsqueeze(0),
+            torch.unsqueeze(grid, 0),
+            mode="bilinear",
+            padding_mode="border",
+            align_corners=True,
+        ).squeeze(0)
+        U_new = F.grid_sample(
+            U.unsqueeze(0),
+            torch.unsqueeze(grid, 0),
+            mode="bilinear",
+            padding_mode="border",
+            align_corners=True,
+        ).squeeze(0)
+        V_new = F.grid_sample(
+            V.unsqueeze(0),
+            torch.unsqueeze(grid, 0),
+            mode="bilinear",
+            padding_mode="border",
+            align_corners=True,
+        ).squeeze(0)
+        return S_new, I_new, U_new, V_new
+
+    def crop(self, indices_cropped, bboxes_old, bboxes_new):
+        """
+        Crop outputs for selected bounding boxes to the new bounding boxes.
+        """
+        # VK: cropping is ignored for now
+        # for i, ic in enumerate(indices_cropped):
+        #    self.S[ic], self.I[ic], self.U[ic], self.V[ic] = \
+        #        self._crop(self.S[ic], self.I[ic], self.U[ic], self.V[ic],
+        #        bboxes_old[i], bboxes_new[i])
+        pass
+
+    def hflip(self, transform_data: DensePoseTransformData) -> None:
+        """
+        Change S, I, U and V to take into account a Horizontal flip.
+        """
+        if self.I.shape[0] > 0:
+            for el in "SIUV":
+                self.__dict__[el] = torch.flip(self.__dict__[el], [3])
+            self._flip_iuv_semantics_tensor(transform_data)
+            self._flip_segm_semantics_tensor(transform_data)
+
+    def _flip_iuv_semantics_tensor(self, dp_transform_data: DensePoseTransformData) -> None:
+        point_label_symmetries = dp_transform_data.point_label_symmetries
+        uv_symmetries = dp_transform_data.uv_symmetries
+
+        N, C, H, W = self.U.shape
+        u_loc = (self.U[:, 1:, :, :].clamp(0, 1) * 255).long()
+        v_loc = (self.V[:, 1:, :, :].clamp(0, 1) * 255).long()
+        Iindex = torch.arange(C - 1, device=self.U.device)[None, :, None, None].expand(
+            N, C - 1, H, W
+        )
+        self.U[:, 1:, :, :] = uv_symmetries["U_transforms"][Iindex, v_loc, u_loc].to(
+            device=self.U.device
+        )
+        self.V[:, 1:, :, :] = uv_symmetries["V_transforms"][Iindex, v_loc, u_loc].to(
+            device=self.V.device
+        )
+
+        for el in "IUV":
+            self.__dict__[el] = self.__dict__[el][:, point_label_symmetries, :, :]
+
+    def _flip_segm_semantics_tensor(self, dp_transform_data):
+        if self.S.shape[1] == DensePoseDataRelative.N_BODY_PARTS + 1:
+            self.S = self.S[:, dp_transform_data.mask_label_symmetries, :, :]
+
+    def to_result(self, boxes_xywh):
+        """
+        Convert DensePose outputs to results format. Results are more compact,
+        but cannot be resampled any more
+        """
+        result = DensePoseResult(boxes_xywh, self.S, self.I, self.U, self.V)
+        return result
+
+    def __getitem__(self, item):
+        if isinstance(item, int):
+            S_selected = self.S[item].unsqueeze(0)
+            I_selected = self.I[item].unsqueeze(0)
+            U_selected = self.U[item].unsqueeze(0)
+            V_selected = self.V[item].unsqueeze(0)
+            conf_selected = {}
+            for key in self.confidences:
+                conf_selected[key] = self.confidences[key][item].unsqueeze(0)
+        else:
+            S_selected = self.S[item]
+            I_selected = self.I[item]
+            U_selected = self.U[item]
+            V_selected = self.V[item]
+            conf_selected = {}
+            for key in self.confidences:
+                conf_selected[key] = self.confidences[key][item]
+        return DensePoseOutput(S_selected, I_selected, U_selected, V_selected, conf_selected)
+
+    def __str__(self):
+        s = "DensePoseOutput S {}, I {}, U {}, V {}".format(
+            list(self.S.size()), list(self.I.size()), list(self.U.size()), list(self.V.size())
+        )
+        s_conf = "confidences: [{}]".format(
+            ", ".join([f"{key} {list(self.confidences[key].size())}" for key in self.confidences])
+        )
+        return ", ".join([s, s_conf])
+
+    def __len__(self):
+        return self.S.size(0)
+
+
+class DensePoseResult(object):
+    def __init__(self, boxes_xywh, S, I, U, V):
+        self.results = []
+        self.boxes_xywh = boxes_xywh.cpu().tolist()
+        assert len(boxes_xywh.size()) == 2
+        assert boxes_xywh.size(1) == 4
+        for i, box_xywh in enumerate(boxes_xywh):
+            result_i = self._output_to_result(box_xywh, S[[i]], I[[i]], U[[i]], V[[i]])
+            result_numpy_i = result_i.cpu().numpy()
+            result_encoded_i = DensePoseResult.encode_png_data(result_numpy_i)
+            result_encoded_with_shape_i = (result_numpy_i.shape, result_encoded_i)
+            self.results.append(result_encoded_with_shape_i)
+
+    def __str__(self):
+        s = "DensePoseResult: N={} [{}]".format(
+            len(self.results), ", ".join([str(list(r[0])) for r in self.results])
+        )
+        return s
+
+    def _output_to_result(self, box_xywh, S, I, U, V):
+        x, y, w, h = box_xywh
+        w = max(int(w), 1)
+        h = max(int(h), 1)
+        result = torch.zeros([3, h, w], dtype=torch.uint8, device=U.device)
+        assert (
+            len(S.size()) == 4
+        ), "AnnIndex tensor size should have {} " "dimensions but has {}".format(4, len(S.size()))
+        s_bbox = F.interpolate(S, (h, w), mode="bilinear", align_corners=False).argmax(dim=1)
+        assert (
+            len(I.size()) == 4
+        ), "IndexUV tensor size should have {} " "dimensions but has {}".format(4, len(S.size()))
+        i_bbox = (
+            F.interpolate(I, (h, w), mode="bilinear", align_corners=False).argmax(dim=1)
+            * (s_bbox > 0).long()
+        ).squeeze(0)
+        assert len(U.size()) == 4, "U tensor size should have {} " "dimensions but has {}".format(
+            4, len(U.size())
+        )
+        u_bbox = F.interpolate(U, (h, w), mode="bilinear", align_corners=False)
+        assert len(V.size()) == 4, "V tensor size should have {} " "dimensions but has {}".format(
+            4, len(V.size())
+        )
+        v_bbox = F.interpolate(V, (h, w), mode="bilinear", align_corners=False)
+        result[0] = i_bbox
+        for part_id in range(1, u_bbox.size(1)):
+            result[1][i_bbox == part_id] = (
+                (u_bbox[0, part_id][i_bbox == part_id] * 255).clamp(0, 255).to(torch.uint8)
+            )
+            result[2][i_bbox == part_id] = (
+                (v_bbox[0, part_id][i_bbox == part_id] * 255).clamp(0, 255).to(torch.uint8)
+            )
+        assert (
+            result.size(1) == h
+        ), "Results height {} should be equal" "to bounding box height {}".format(result.size(1), h)
+        assert (
+            result.size(2) == w
+        ), "Results width {} should be equal" "to bounding box width {}".format(result.size(2), w)
+        return result
+
+    @staticmethod
+    def encode_png_data(arr):
+        """
+        Encode array data as a PNG image using the highest compression rate
+        @param arr [in] Data stored in an array of size (3, M, N) of type uint8
+        @return Base64-encoded string containing PNG-compressed data
+        """
+        assert len(arr.shape) == 3, "Expected a 3D array as an input," " got a {0}D array".format(
+            len(arr.shape)
+        )
+        assert arr.shape[0] == 3, "Expected first array dimension of size 3," " got {0}".format(
+            arr.shape[0]
+        )
+        assert arr.dtype == np.uint8, "Expected an array of type np.uint8, " " got {0}".format(
+            arr.dtype
+        )
+        data = np.moveaxis(arr, 0, -1)
+        im = Image.fromarray(data)
+        fstream = BytesIO()
+        im.save(fstream, format="png", optimize=True)
+        s = base64.encodebytes(fstream.getvalue()).decode()
+        return s
+
+    @staticmethod
+    def decode_png_data(shape, s):
+        """
+        Decode array data from a string that contains PNG-compressed data
+        @param Base64-encoded string containing PNG-compressed data
+        @return Data stored in an array of size (3, M, N) of type uint8
+        """
+        fstream = BytesIO(base64.decodebytes(s.encode()))
+        im = Image.open(fstream)
+        data = np.moveaxis(np.array(im.getdata(), dtype=np.uint8), -1, 0)
+        return data.reshape(shape)
+
+    def __len__(self):
+        return len(self.results)
+
+    def __getitem__(self, item):
+        result_encoded = self.results[item]
+        bbox_xywh = self.boxes_xywh[item]
+        return result_encoded, bbox_xywh
+
+
+class DensePoseList(object):
+
+    _TORCH_DEVICE_CPU = torch.device("cpu")
+
+    def __init__(self, densepose_datas, boxes_xyxy_abs, image_size_hw, device=_TORCH_DEVICE_CPU):
+        assert len(densepose_datas) == len(
+            boxes_xyxy_abs
+        ), "Attempt to initialize DensePoseList with {} DensePose datas " "and {} boxes".format(
+            len(densepose_datas), len(boxes_xyxy_abs)
+        )
+        self.densepose_datas = []
+        for densepose_data in densepose_datas:
+            assert isinstance(densepose_data, DensePoseDataRelative) or densepose_data is None, (
+                "Attempt to initialize DensePoseList with DensePose datas "
+                "of type {}, expected DensePoseDataRelative".format(type(densepose_data))
+            )
+            densepose_data_ondevice = (
+                densepose_data.to(device) if densepose_data is not None else None
+            )
+            self.densepose_datas.append(densepose_data_ondevice)
+        self.boxes_xyxy_abs = boxes_xyxy_abs.to(device)
+        self.image_size_hw = image_size_hw
+        self.device = device
+
+    def to(self, device):
+        if self.device == device:
+            return self
+        return DensePoseList(self.densepose_datas, self.boxes_xyxy_abs, self.image_size_hw, device)
+
+    def __iter__(self):
+        return iter(self.densepose_datas)
+
+    def __len__(self):
+        return len(self.densepose_datas)
+
+    def __repr__(self):
+        s = self.__class__.__name__ + "("
+        s += "num_instances={}, ".format(len(self.densepose_datas))
+        s += "image_width={}, ".format(self.image_size_hw[1])
+        s += "image_height={})".format(self.image_size_hw[0])
+        return s
+
+    def __getitem__(self, item):
+        if isinstance(item, int):
+            densepose_data_rel = self.densepose_datas[item]
+            return densepose_data_rel
+        elif isinstance(item, slice):
+            densepose_datas_rel = self.densepose_datas[item]
+            boxes_xyxy_abs = self.boxes_xyxy_abs[item]
+            return DensePoseList(
+                densepose_datas_rel, boxes_xyxy_abs, self.image_size_hw, self.device
+            )
+        elif isinstance(item, torch.Tensor) and (item.dtype == torch.bool):
+            densepose_datas_rel = [self.densepose_datas[i] for i, x in enumerate(item) if x > 0]
+            boxes_xyxy_abs = self.boxes_xyxy_abs[item]
+            return DensePoseList(
+                densepose_datas_rel, boxes_xyxy_abs, self.image_size_hw, self.device
+            )
+        else:
+            densepose_datas_rel = [self.densepose_datas[i] for i in item]
+            boxes_xyxy_abs = self.boxes_xyxy_abs[item]
+            return DensePoseList(
+                densepose_datas_rel, boxes_xyxy_abs, self.image_size_hw, self.device
+            )
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/densepose_coco_evaluation.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/densepose_coco_evaluation.py
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/densepose_head.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/densepose_head.py
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/evaluator.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/evaluator.py
@@ -0,0 +1,158 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+import contextlib
+import copy
+import io
+import itertools
+import json
+import logging
+import os
+from collections import OrderedDict
+import torch
+from fvcore.common.file_io import PathManager
+from pycocotools.coco import COCO
+
+from detectron2.data import MetadataCatalog
+from detectron2.evaluation import DatasetEvaluator
+from detectron2.structures import BoxMode
+from detectron2.utils.comm import all_gather, is_main_process, synchronize
+from detectron2.utils.logger import create_small_table
+
+from .densepose_coco_evaluation import DensePoseCocoEval, DensePoseEvalMode
+
+
+class DensePoseCOCOEvaluator(DatasetEvaluator):
+    def __init__(self, dataset_name, distributed, output_dir=None):
+        self._distributed = distributed
+        self._output_dir = output_dir
+
+        self._cpu_device = torch.device("cpu")
+        self._logger = logging.getLogger(__name__)
+
+        self._metadata = MetadataCatalog.get(dataset_name)
+        json_file = PathManager.get_local_path(self._metadata.json_file)
+        with contextlib.redirect_stdout(io.StringIO()):
+            self._coco_api = COCO(json_file)
+
+    def reset(self):
+        self._predictions = []
+
+    def process(self, inputs, outputs):
+        """
+        Args:
+            inputs: the inputs to a COCO model (e.g., GeneralizedRCNN).
+                It is a list of dict. Each dict corresponds to an image and
+                contains keys like "height", "width", "file_name", "image_id".
+            outputs: the outputs of a COCO model. It is a list of dicts with key
+                "instances" that contains :class:`Instances`.
+                The :class:`Instances` object needs to have `densepose` field.
+        """
+        for input, output in zip(inputs, outputs):
+            instances = output["instances"].to(self._cpu_device)
+
+            boxes = instances.pred_boxes.tensor.clone()
+            boxes = BoxMode.convert(boxes, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
+            instances.pred_densepose = instances.pred_densepose.to_result(boxes)
+
+            json_results = prediction_to_json(instances, input["image_id"])
+            self._predictions.extend(json_results)
+
+    def evaluate(self):
+        if self._distributed:
+            synchronize()
+            predictions = all_gather(self._predictions)
+            predictions = list(itertools.chain(*predictions))
+            if not is_main_process():
+                return
+        else:
+            predictions = self._predictions
+
+        return copy.deepcopy(self._eval_predictions(predictions))
+
+    def _eval_predictions(self, predictions):
+        """
+        Evaluate predictions on densepose.
+        Return results with the metrics of the tasks.
+        """
+        self._logger.info("Preparing results for COCO format ...")
+
+        if self._output_dir:
+            file_path = os.path.join(self._output_dir, "coco_densepose_results.json")
+            with open(file_path, "w") as f:
+                json.dump(predictions, f)
+                f.flush()
+                os.fsync(f.fileno())
+
+        self._logger.info("Evaluating predictions ...")
+        res = OrderedDict()
+        results_gps, results_gpsm = _evaluate_predictions_on_coco(self._coco_api, predictions)
+        res["densepose_gps"] = results_gps
+        res["densepose_gpsm"] = results_gpsm
+        return res
+
+
+def prediction_to_json(instances, img_id):
+    """
+    Args:
+        instances (Instances): the output of the model
+        img_id (str): the image id in COCO
+
+    Returns:
+        list[dict]: the results in densepose evaluation format
+    """
+    scores = instances.scores.tolist()
+
+    results = []
+    for k in range(len(instances)):
+        densepose = instances.pred_densepose[k]
+        result = {
+            "image_id": img_id,
+            "category_id": 1,  # densepose only has one class
+            "bbox": densepose[1],
+            "score": scores[k],
+            "densepose": densepose,
+        }
+        results.append(result)
+    return results
+
+
+def _evaluate_predictions_on_coco(coco_gt, coco_results):
+    metrics = ["AP", "AP50", "AP75", "APm", "APl"]
+
+    logger = logging.getLogger(__name__)
+
+    if len(coco_results) == 0:  # cocoapi does not handle empty results very well
+        logger.warn("No predictions from the model! Set scores to -1")
+        results_gps = {metric: -1 for metric in metrics}
+        results_gpsm = {metric: -1 for metric in metrics}
+        return results_gps, results_gpsm
+
+    coco_dt = coco_gt.loadRes(coco_results)
+    results_gps = _evaluate_predictions_on_coco_gps(coco_gt, coco_dt, metrics)
+    logger.info(
+        "Evaluation results for densepose, GPS metric: \n" + create_small_table(results_gps)
+    )
+    results_gpsm = _evaluate_predictions_on_coco_gpsm(coco_gt, coco_dt, metrics)
+    logger.info(
+        "Evaluation results for densepose, GPSm metric: \n" + create_small_table(results_gpsm)
+    )
+    return results_gps, results_gpsm
+
+
+def _evaluate_predictions_on_coco_gps(coco_gt, coco_dt, metrics):
+    coco_eval = DensePoseCocoEval(coco_gt, coco_dt, "densepose", dpEvalMode=DensePoseEvalMode.GPS)
+    coco_eval.evaluate()
+    coco_eval.accumulate()
+    coco_eval.summarize()
+    results = {metric: float(coco_eval.stats[idx] * 100) for idx, metric in enumerate(metrics)}
+    return results
+
+
+def _evaluate_predictions_on_coco_gpsm(coco_gt, coco_dt, metrics):
+    coco_eval = DensePoseCocoEval(coco_gt, coco_dt, "densepose", dpEvalMode=DensePoseEvalMode.GPSM)
+    coco_eval.evaluate()
+    coco_eval.accumulate()
+    coco_eval.summarize()
+    results = {metric: float(coco_eval.stats[idx] * 100) for idx, metric in enumerate(metrics)}
+    return results
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/modeling/test_time_augmentation.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/modeling/test_time_augmentation.py
@@ -0,0 +1,75 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+from detectron2.modeling.test_time_augmentation import GeneralizedRCNNWithTTA
+
+
+class DensePoseGeneralizedRCNNWithTTA(GeneralizedRCNNWithTTA):
+    def __init__(self, cfg, model, transform_data, tta_mapper=None, batch_size=1):
+        """
+        Args:
+            cfg (CfgNode):
+            model (GeneralizedRCNN): a GeneralizedRCNN to apply TTA on.
+            transform_data (DensePoseTransformData): contains symmetry label
+                transforms used for horizontal flip
+            tta_mapper (callable): takes a dataset dict and returns a list of
+                augmented versions of the dataset dict. Defaults to
+                `DatasetMapperTTA(cfg)`.
+            batch_size (int): batch the augmented images into this batch size for inference.
+        """
+        self._transform_data = transform_data
+        super().__init__(cfg=cfg, model=model, tta_mapper=tta_mapper, batch_size=batch_size)
+
+    # the implementation follows closely the one from detectron2/modeling
+    def _inference_one_image(self, input):
+        """
+        Args:
+            input (dict): one dataset dict
+
+        Returns:
+            dict: one output dict
+        """
+
+        augmented_inputs, aug_vars = self._get_augmented_inputs(input)
+        # Detect boxes from all augmented versions
+        with self._turn_off_roi_heads(["mask_on", "keypoint_on", "densepose_on"]):
+            # temporarily disable roi heads
+            all_boxes, all_scores, all_classes = self._get_augmented_boxes(
+                augmented_inputs, aug_vars
+            )
+        merged_instances = self._merge_detections(
+            all_boxes, all_scores, all_classes, (aug_vars["height"], aug_vars["width"])
+        )
+
+        if self.cfg.MODEL.MASK_ON or self.cfg.MODEL.DENSEPOSE_ON:
+            # Use the detected boxes to obtain new fields
+            augmented_instances = self._rescale_detected_boxes(
+                augmented_inputs, merged_instances, aug_vars
+            )
+            # run forward on the detected boxes
+            outputs = self._batch_inference(
+                augmented_inputs, augmented_instances, do_postprocess=False
+            )
+            # Delete now useless variables to avoid being out of memory
+            del augmented_inputs, augmented_instances, merged_instances
+            # average the predictions
+            if self.cfg.MODEL.MASK_ON:
+                outputs[0].pred_masks = self._reduce_pred_masks(outputs, aug_vars)
+            if self.cfg.MODEL.DENSEPOSE_ON:
+                outputs[0].pred_densepose = self._reduce_pred_densepose(outputs, aug_vars)
+            # postprocess
+            output = self._detector_postprocess(outputs[0], aug_vars)
+            return {"instances": output}
+        else:
+            return {"instances": merged_instances}
+
+    def _reduce_pred_densepose(self, outputs, aug_vars):
+        for idx, output in enumerate(outputs):
+            if aug_vars["do_hflip"][idx]:
+                output.pred_densepose.hflip(self._transform_data)
+        # Less memory-intensive averaging
+        for attr in "SIUV":
+            setattr(
+                outputs[0].pred_densepose,
+                attr,
+                sum(getattr(o.pred_densepose, attr) for o in outputs) / len(outputs),
+            )
+        return outputs[0].pred_densepose
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/roi_head.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/roi_head.py
@@ -0,0 +1,213 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+import numpy as np
+from typing import Dict
+import fvcore.nn.weight_init as weight_init
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+
+from detectron2.layers import Conv2d, ShapeSpec, get_norm
+from detectron2.modeling import ROI_HEADS_REGISTRY, StandardROIHeads
+from detectron2.modeling.poolers import ROIPooler
+from detectron2.modeling.roi_heads import select_foreground_proposals
+
+from .densepose_head import (
+    build_densepose_data_filter,
+    build_densepose_head,
+    build_densepose_losses,
+    build_densepose_predictor,
+    densepose_inference,
+)
+
+
+class Decoder(nn.Module):
+    """
+    A semantic segmentation head described in detail in the Panoptic Feature Pyramid Networks paper
+    (https://arxiv.org/abs/1901.02446). It takes FPN features as input and merges information from
+    all levels of the FPN into single output.
+    """
+
+    def __init__(self, cfg, input_shape: Dict[str, ShapeSpec], in_features):
+        super(Decoder, self).__init__()
+
+        # fmt: off
+        self.in_features      = in_features
+        feature_strides       = {k: v.stride for k, v in input_shape.items()}
+        feature_channels      = {k: v.channels for k, v in input_shape.items()}
+        num_classes           = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NUM_CLASSES
+        conv_dims             = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_CONV_DIMS
+        self.common_stride    = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_COMMON_STRIDE
+        norm                  = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NORM
+        # fmt: on
+
+        self.scale_heads = []
+        for in_feature in self.in_features:
+            head_ops = []
+            head_length = max(
+                1, int(np.log2(feature_strides[in_feature]) - np.log2(self.common_stride))
+            )
+            for k in range(head_length):
+                conv = Conv2d(
+                    feature_channels[in_feature] if k == 0 else conv_dims,
+                    conv_dims,
+                    kernel_size=3,
+                    stride=1,
+                    padding=1,
+                    bias=not norm,
+                    norm=get_norm(norm, conv_dims),
+                    activation=F.relu,
+                )
+                weight_init.c2_msra_fill(conv)
+                head_ops.append(conv)
+                if feature_strides[in_feature] != self.common_stride:
+                    head_ops.append(
+                        nn.Upsample(scale_factor=2, mode="bilinear", align_corners=False)
+                    )
+            self.scale_heads.append(nn.Sequential(*head_ops))
+            self.add_module(in_feature, self.scale_heads[-1])
+        self.predictor = Conv2d(conv_dims, num_classes, kernel_size=1, stride=1, padding=0)
+        weight_init.c2_msra_fill(self.predictor)
+
+    def forward(self, features):
+        for i, _ in enumerate(self.in_features):
+            if i == 0:
+                x = self.scale_heads[i](features[i])
+            else:
+                x = x + self.scale_heads[i](features[i])
+        x = self.predictor(x)
+        return x
+
+
+@ROI_HEADS_REGISTRY.register()
+class DensePoseROIHeads(StandardROIHeads):
+    """
+    A Standard ROIHeads which contains an addition of DensePose head.
+    """
+
+    def __init__(self, cfg, input_shape):
+        super().__init__(cfg, input_shape)
+        self._init_densepose_head(cfg, input_shape)
+
+    def _init_densepose_head(self, cfg, input_shape):
+        # fmt: off
+        self.densepose_on          = cfg.MODEL.DENSEPOSE_ON
+        if not self.densepose_on:
+            return
+        self.densepose_data_filter = build_densepose_data_filter(cfg)
+        dp_pooler_resolution       = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_RESOLUTION
+        dp_pooler_sampling_ratio   = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_SAMPLING_RATIO
+        dp_pooler_type             = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_TYPE
+        self.use_decoder           = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_ON
+        # fmt: on
+        if self.use_decoder:
+            dp_pooler_scales = (1.0 / input_shape[self.in_features[0]].stride,)
+        else:
+            dp_pooler_scales = tuple(1.0 / input_shape[k].stride for k in self.in_features)
+        in_channels = [input_shape[f].channels for f in self.in_features][0]
+
+        if self.use_decoder:
+            self.decoder = Decoder(cfg, input_shape, self.in_features)
+
+        self.densepose_pooler = ROIPooler(
+            output_size=dp_pooler_resolution,
+            scales=dp_pooler_scales,
+            sampling_ratio=dp_pooler_sampling_ratio,
+            pooler_type=dp_pooler_type,
+        )
+        self.densepose_head = build_densepose_head(cfg, in_channels)
+        self.densepose_predictor = build_densepose_predictor(
+            cfg, self.densepose_head.n_out_channels
+        )
+        self.densepose_losses = build_densepose_losses(cfg)
+
+    def _forward_densepose(self, features, instances):
+        """
+        Forward logic of the densepose prediction branch.
+
+        Args:
+            features (list[Tensor]): #level input features for densepose prediction
+            instances (list[Instances]): the per-image instances to train/predict densepose.
+                In training, they can be the proposals.
+                In inference, they can be the predicted boxes.
+
+        Returns:
+            In training, a dict of losses.
+            In inference, update `instances` with new fields "densepose" and return it.
+        """
+        if not self.densepose_on:
+            return {} if self.training else instances
+
+        features = [features[f] for f in self.in_features]
+        if self.training:
+            proposals, _ = select_foreground_proposals(instances, self.num_classes)
+            proposals_dp = self.densepose_data_filter(proposals)
+            if len(proposals_dp) > 0:
+                # NOTE may deadlock in DDP if certain workers have empty proposals_dp
+                proposal_boxes = [x.proposal_boxes for x in proposals_dp]
+
+                if self.use_decoder:
+                    features = [self.decoder(features)]
+
+                features_dp = self.densepose_pooler(features, proposal_boxes)
+                densepose_head_outputs = self.densepose_head(features_dp)
+                densepose_outputs, _, confidences, _ = self.densepose_predictor(
+                    densepose_head_outputs
+                )
+                densepose_loss_dict = self.densepose_losses(
+                    proposals_dp, densepose_outputs, confidences
+                )
+                return densepose_loss_dict
+        else:
+            pred_boxes = [x.pred_boxes for x in instances]
+
+            if self.use_decoder:
+                features = [self.decoder(features)]
+
+            features_dp = self.densepose_pooler(features, pred_boxes)
+            if len(features_dp) > 0:
+                densepose_head_outputs = self.densepose_head(features_dp)
+                densepose_outputs, _, confidences, _ = self.densepose_predictor(
+                    densepose_head_outputs
+                )
+            else:
+                # If no detection occurred instances
+                # set densepose_outputs to empty tensors
+                empty_tensor = torch.zeros(size=(0, 0, 0, 0), device=features_dp.device)
+                densepose_outputs = tuple([empty_tensor] * 4)
+                confidences = tuple([empty_tensor] * 4)
+
+            densepose_inference(densepose_outputs, confidences, instances)
+            return instances
+
+    def forward(self, images, features, proposals, targets=None):
+        instances, losses = super().forward(images, features, proposals, targets)
+        del targets, images
+
+        if self.training:
+            losses.update(self._forward_densepose(features, instances))
+        return instances, losses
+
+    def forward_with_given_boxes(self, features, instances):
+        """
+        Use the given boxes in `instances` to produce other (non-box) per-ROI outputs.
+
+        This is useful for downstream tasks where a box is known, but need to obtain
+        other attributes (outputs of other heads).
+        Test-time augmentation also uses this.
+
+        Args:
+            features: same as in `forward()`
+            instances (list[Instances]): instances to predict other outputs. Expect the keys
+                "pred_boxes" and "pred_classes" to exist.
+
+        Returns:
+            instances (list[Instances]):
+                the same `Instances` objects, with extra
+                fields such as `pred_masks` or `pred_keypoints`.
+        """
+
+        instances = super().forward_with_given_boxes(features, instances)
+        instances = self._forward_densepose(features, instances)
+        return instances
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/utils/dbhelper.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/utils/dbhelper.py
@@ -0,0 +1,145 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+from typing import Any, Dict, Optional, Tuple
+
+
+class EntrySelector(object):
+    """
+    Base class for entry selectors
+    """
+
+    @staticmethod
+    def from_string(spec: str) -> "EntrySelector":
+        if spec == "*":
+            return AllEntrySelector()
+        return FieldEntrySelector(spec)
+
+
+class AllEntrySelector(EntrySelector):
+    """
+    Selector that accepts all entries
+    """
+
+    SPECIFIER = "*"
+
+    def __call__(self, entry):
+        return True
+
+
+class FieldEntrySelector(EntrySelector):
+    """
+    Selector that accepts only entries that match provided field
+    specifier(s). Only a limited set of specifiers is supported for now:
+      <specifiers>::=<specifier>[<comma><specifiers>]
+      <specifier>::=<field_name>[<type_delim><type>]<equal><value_or_range>
+      <field_name> is a valid identifier
+      <type> ::= "int" | "str"
+      <equal> ::= "="
+      <comma> ::= ","
+      <type_delim> ::= ":"
+      <value_or_range> ::= <value> | <range>
+      <range> ::= <value><range_delim><value>
+      <range_delim> ::= "-"
+      <value> is a string without spaces and special symbols
+        (e.g. <comma>, <equal>, <type_delim>, <range_delim>)
+    """
+
+    _SPEC_DELIM = ","
+    _TYPE_DELIM = ":"
+    _RANGE_DELIM = "-"
+    _EQUAL = "="
+    _ERROR_PREFIX = "Invalid field selector specifier"
+
+    class _FieldEntryValuePredicate(object):
+        """
+        Predicate that checks strict equality for the specified entry field
+        """
+
+        def __init__(self, name: str, typespec: str, value: str):
+            import builtins
+
+            self.name = name
+            self.type = getattr(builtins, typespec) if typespec is not None else str
+            self.value = value
+
+        def __call__(self, entry):
+            return entry[self.name] == self.type(self.value)
+
+    class _FieldEntryRangePredicate(object):
+        """
+        Predicate that checks whether an entry field falls into the specified range
+        """
+
+        def __init__(self, name: str, typespec: str, vmin: str, vmax: str):
+            import builtins
+
+            self.name = name
+            self.type = getattr(builtins, typespec) if typespec is not None else str
+            self.vmin = vmin
+            self.vmax = vmax
+
+        def __call__(self, entry):
+            return (entry[self.name] >= self.type(self.vmin)) and (
+                entry[self.name] <= self.type(self.vmax)
+            )
+
+    def __init__(self, spec: str):
+        self._predicates = self._parse_specifier_into_predicates(spec)
+
+    def __call__(self, entry: Dict[str, Any]):
+        for predicate in self._predicates:
+            if not predicate(entry):
+                return False
+        return True
+
+    def _parse_specifier_into_predicates(self, spec: str):
+        predicates = []
+        specs = spec.split(self._SPEC_DELIM)
+        for subspec in specs:
+            eq_idx = subspec.find(self._EQUAL)
+            if eq_idx > 0:
+                field_name_with_type = subspec[:eq_idx]
+                field_name, field_type = self._parse_field_name_type(field_name_with_type)
+                field_value_or_range = subspec[eq_idx + 1 :]
+                if self._is_range_spec(field_value_or_range):
+                    vmin, vmax = self._get_range_spec(field_value_or_range)
+                    predicate = FieldEntrySelector._FieldEntryRangePredicate(
+                        field_name, field_type, vmin, vmax
+                    )
+                else:
+                    predicate = FieldEntrySelector._FieldEntryValuePredicate(
+                        field_name, field_type, field_value_or_range
+                    )
+                predicates.append(predicate)
+            elif eq_idx == 0:
+                self._parse_error(f'"{subspec}", field name is empty!')
+            else:
+                self._parse_error(f'"{subspec}", should have format ' "<field>=<value_or_range>!")
+        return predicates
+
+    def _parse_field_name_type(self, field_name_with_type: str) -> Tuple[str, Optional[str]]:
+        type_delim_idx = field_name_with_type.find(self._TYPE_DELIM)
+        if type_delim_idx > 0:
+            field_name = field_name_with_type[:type_delim_idx]
+            field_type = field_name_with_type[type_delim_idx + 1 :]
+        elif type_delim_idx == 0:
+            self._parse_error(f'"{field_name_with_type}", field name is empty!')
+        else:
+            field_name = field_name_with_type
+            field_type = None
+        return field_name, field_type
+
+    def _is_range_spec(self, field_value_or_range):
+        delim_idx = field_value_or_range.find(self._RANGE_DELIM)
+        return delim_idx > 0
+
+    def _get_range_spec(self, field_value_or_range):
+        if self._is_range_spec(field_value_or_range):
+            delim_idx = field_value_or_range.find(self._RANGE_DELIM)
+            vmin = field_value_or_range[:delim_idx]
+            vmax = field_value_or_range[delim_idx + 1 :]
+            return vmin, vmax
+        else:
+            self._parse_error('"field_value_or_range", range of values expected!')
+
+    def _parse_error(self, msg):
+        raise ValueError(f"{self._ERROR_PREFIX}: {msg}")
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/utils/logger.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/utils/logger.py
@@ -0,0 +1,13 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+import logging
+
+
+def verbosity_to_level(verbosity):
+    if verbosity is not None:
+        if verbosity == 0:
+            return logging.WARNING
+        elif verbosity == 1:
+            return logging.INFO
+        elif verbosity >= 2:
+            return logging.DEBUG
+    return logging.WARNING
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/utils/transform.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/utils/transform.py
@@ -0,0 +1,16 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+from fvcore.common.file_io import PathManager
+
+from detectron2.data import MetadataCatalog
+
+from densepose import DensePoseTransformData
+
+
+def load_for_dataset(dataset_name):
+    path = MetadataCatalog.get(dataset_name).densepose_transform_src
+    densepose_transform_data_fpath = PathManager.get_local_path(path)
+    return DensePoseTransformData.load(densepose_transform_data_fpath)
+
+
+def load_from_cfg(cfg):
+    return load_for_dataset(cfg.DATASETS.TEST[0])
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/vis/base.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/vis/base.py
@@ -0,0 +1,191 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+import logging
+import numpy as np
+import cv2
+import torch
+
+Image = np.ndarray
+Boxes = torch.Tensor
+
+
+class MatrixVisualizer(object):
+    """
+    Base visualizer for matrix data
+    """
+
+    def __init__(
+        self,
+        inplace=True,
+        cmap=cv2.COLORMAP_PARULA,
+        val_scale=1.0,
+        alpha=0.7,
+        interp_method_matrix=cv2.INTER_LINEAR,
+        interp_method_mask=cv2.INTER_NEAREST,
+    ):
+        self.inplace = inplace
+        self.cmap = cmap
+        self.val_scale = val_scale
+        self.alpha = alpha
+        self.interp_method_matrix = interp_method_matrix
+        self.interp_method_mask = interp_method_mask
+
+    def visualize(self, image_bgr, mask, matrix, bbox_xywh):
+        self._check_image(image_bgr)
+        self._check_mask_matrix(mask, matrix)
+        if self.inplace:
+            image_target_bgr = image_bgr
+        else:
+            image_target_bgr = image_bgr * 0
+        x, y, w, h = [int(v) for v in bbox_xywh]
+        if w <= 0 or h <= 0:
+            return image_bgr
+        mask, matrix = self._resize(mask, matrix, w, h)
+        mask_bg = np.tile((mask == 0)[:, :, np.newaxis], [1, 1, 3])
+        matrix_scaled = matrix.astype(np.float32) * self.val_scale
+        _EPSILON = 1e-6
+        if np.any(matrix_scaled > 255 + _EPSILON):
+            logger = logging.getLogger(__name__)
+            logger.warning(
+                f"Matrix has values > {255 + _EPSILON} after " f"scaling, clipping to [0..255]"
+            )
+        matrix_scaled_8u = matrix_scaled.clip(0, 255).astype(np.uint8)
+        matrix_vis = cv2.applyColorMap(matrix_scaled_8u, self.cmap)
+        matrix_vis[mask_bg] = image_target_bgr[y : y + h, x : x + w, :][mask_bg]
+        image_target_bgr[y : y + h, x : x + w, :] = (
+            image_target_bgr[y : y + h, x : x + w, :] * (1.0 - self.alpha) + matrix_vis * self.alpha
+        )
+        return image_target_bgr.astype(np.uint8)
+
+    def _resize(self, mask, matrix, w, h):
+        if (w != mask.shape[1]) or (h != mask.shape[0]):
+            mask = cv2.resize(mask, (w, h), self.interp_method_mask)
+        if (w != matrix.shape[1]) or (h != matrix.shape[0]):
+            matrix = cv2.resize(matrix, (w, h), self.interp_method_matrix)
+        return mask, matrix
+
+    def _check_image(self, image_rgb):
+        assert len(image_rgb.shape) == 3
+        assert image_rgb.shape[2] == 3
+        assert image_rgb.dtype == np.uint8
+
+    def _check_mask_matrix(self, mask, matrix):
+        assert len(matrix.shape) == 2
+        assert len(mask.shape) == 2
+        assert mask.dtype == np.uint8
+
+
+class RectangleVisualizer(object):
+
+    _COLOR_GREEN = (18, 127, 15)
+
+    def __init__(self, color=_COLOR_GREEN, thickness=1):
+        self.color = color
+        self.thickness = thickness
+
+    def visualize(self, image_bgr, bbox_xywh, color=None, thickness=None):
+        x, y, w, h = bbox_xywh
+        color = color or self.color
+        thickness = thickness or self.thickness
+        cv2.rectangle(image_bgr, (int(x), int(y)), (int(x + w), int(y + h)), color, thickness)
+        return image_bgr
+
+
+class PointsVisualizer(object):
+
+    _COLOR_GREEN = (18, 127, 15)
+
+    def __init__(self, color_bgr=_COLOR_GREEN, r=5):
+        self.color_bgr = color_bgr
+        self.r = r
+
+    def visualize(self, image_bgr, pts_xy, colors_bgr=None, rs=None):
+        for j, pt_xy in enumerate(pts_xy):
+            x, y = pt_xy
+            color_bgr = colors_bgr[j] if colors_bgr is not None else self.color_bgr
+            r = rs[j] if rs is not None else self.r
+            cv2.circle(image_bgr, (x, y), r, color_bgr, -1)
+        return image_bgr
+
+
+class TextVisualizer(object):
+
+    _COLOR_GRAY = (218, 227, 218)
+    _COLOR_WHITE = (255, 255, 255)
+
+    def __init__(
+        self,
+        font_face=cv2.FONT_HERSHEY_SIMPLEX,
+        font_color_bgr=_COLOR_GRAY,
+        font_scale=0.35,
+        font_line_type=cv2.LINE_AA,
+        font_line_thickness=1,
+        fill_color_bgr=_COLOR_WHITE,
+        fill_color_transparency=1.0,
+        frame_color_bgr=_COLOR_WHITE,
+        frame_color_transparency=1.0,
+        frame_thickness=1,
+    ):
+        self.font_face = font_face
+        self.font_color_bgr = font_color_bgr
+        self.font_scale = font_scale
+        self.font_line_type = font_line_type
+        self.font_line_thickness = font_line_thickness
+        self.fill_color_bgr = fill_color_bgr
+        self.fill_color_transparency = fill_color_transparency
+        self.frame_color_bgr = frame_color_bgr
+        self.frame_color_transparency = frame_color_transparency
+        self.frame_thickness = frame_thickness
+
+    def visualize(self, image_bgr, txt, topleft_xy):
+        txt_w, txt_h = self.get_text_size_wh(txt)
+        topleft_xy = tuple(map(int, topleft_xy))
+        x, y = topleft_xy
+        if self.frame_color_transparency < 1.0:
+            t = self.frame_thickness
+            image_bgr[y - t : y + txt_h + t, x - t : x + txt_w + t, :] = (
+                image_bgr[y - t : y + txt_h + t, x - t : x + txt_w + t, :]
+                * self.frame_color_transparency
+                + np.array(self.frame_color_bgr) * (1.0 - self.frame_color_transparency)
+            ).astype(np.float)
+        if self.fill_color_transparency < 1.0:
+            image_bgr[y : y + txt_h, x : x + txt_w, :] = (
+                image_bgr[y : y + txt_h, x : x + txt_w, :] * self.fill_color_transparency
+                + np.array(self.fill_color_bgr) * (1.0 - self.fill_color_transparency)
+            ).astype(np.float)
+        cv2.putText(
+            image_bgr,
+            txt,
+            topleft_xy,
+            self.font_face,
+            self.font_scale,
+            self.font_color_bgr,
+            self.font_line_thickness,
+            self.font_line_type,
+        )
+        return image_bgr
+
+    def get_text_size_wh(self, txt):
+        ((txt_w, txt_h), _) = cv2.getTextSize(
+            txt, self.font_face, self.font_scale, self.font_line_thickness
+        )
+        return txt_w, txt_h
+
+
+class CompoundVisualizer(object):
+    def __init__(self, visualizers):
+        self.visualizers = visualizers
+
+    def visualize(self, image_bgr, data):
+        assert len(data) == len(
+            self.visualizers
+        ), "The number of datas {} should match the number of visualizers" " {}".format(
+            len(data), len(self.visualizers)
+        )
+        image = image_bgr
+        for i, visualizer in enumerate(self.visualizers):
+            image = visualizer.visualize(image, data[i])
+        return image
+
+    def __str__(self):
+        visualizer_str = ", ".join([str(v) for v in self.visualizers])
+        return "Compound Visualizer [{}]".format(visualizer_str)
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/vis/bounding_box.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/vis/bounding_box.py
@@ -0,0 +1,37 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+from .base import RectangleVisualizer, TextVisualizer
+
+
+class BoundingBoxVisualizer(object):
+    def __init__(self):
+        self.rectangle_visualizer = RectangleVisualizer()
+
+    def visualize(self, image_bgr, boxes_xywh):
+        for bbox_xywh in boxes_xywh:
+            image_bgr = self.rectangle_visualizer.visualize(image_bgr, bbox_xywh)
+        return image_bgr
+
+
+class ScoredBoundingBoxVisualizer(object):
+    def __init__(self, bbox_visualizer_params=None, score_visualizer_params=None):
+        if bbox_visualizer_params is None:
+            bbox_visualizer_params = {}
+        if score_visualizer_params is None:
+            score_visualizer_params = {}
+        self.visualizer_bbox = RectangleVisualizer(**bbox_visualizer_params)
+        self.visualizer_score = TextVisualizer(**score_visualizer_params)
+
+    def visualize(self, image_bgr, scored_bboxes):
+        boxes_xywh, box_scores = scored_bboxes
+        assert len(boxes_xywh) == len(
+            box_scores
+        ), "Number of bounding boxes {} should be equal to the number of scores {}".format(
+            len(boxes_xywh), len(box_scores)
+        )
+        for i, box_xywh in enumerate(boxes_xywh):
+            score_i = box_scores[i]
+            image_bgr = self.visualizer_bbox.visualize(image_bgr, box_xywh)
+            score_txt = "{0:6.4f}".format(score_i)
+            topleft_xy = box_xywh[0], box_xywh[1]
+            image_bgr = self.visualizer_score.visualize(image_bgr, score_txt, topleft_xy)
+        return image_bgr
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/vis/densepose.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/vis/densepose.py
@@ -0,0 +1,593 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+import logging
+import numpy as np
+from typing import Iterable, Optional, Tuple
+import cv2
+
+from ..data.structures import DensePoseDataRelative, DensePoseOutput, DensePoseResult
+from .base import Boxes, Image, MatrixVisualizer, PointsVisualizer
+
+
+class DensePoseResultsVisualizer(object):
+    def visualize(self, image_bgr: Image, densepose_result: Optional[DensePoseResult]) -> Image:
+        if densepose_result is None:
+            return image_bgr
+        context = self.create_visualization_context(image_bgr)
+        for i, result_encoded_w_shape in enumerate(densepose_result.results):
+            iuv_arr = DensePoseResult.decode_png_data(*result_encoded_w_shape)
+            bbox_xywh = densepose_result.boxes_xywh[i]
+            self.visualize_iuv_arr(context, iuv_arr, bbox_xywh)
+        image_bgr = self.context_to_image_bgr(context)
+        return image_bgr
+
+
+class DensePoseMaskedColormapResultsVisualizer(DensePoseResultsVisualizer):
+    def __init__(
+        self,
+        data_extractor,
+        segm_extractor,
+        inplace=True,
+        cmap=cv2.COLORMAP_PARULA,
+        alpha=0.7,
+        val_scale=1.0,
+    ):
+        self.mask_visualizer = MatrixVisualizer(
+            inplace=inplace, cmap=cmap, val_scale=val_scale, alpha=alpha
+        )
+        self.data_extractor = data_extractor
+        self.segm_extractor = segm_extractor
+
+    def create_visualization_context(self, image_bgr: Image):
+        return image_bgr
+
+    def context_to_image_bgr(self, context):
+        return context
+
+    def get_image_bgr_from_context(self, context):
+        return context
+
+    def visualize_iuv_arr(self, context, iuv_arr, bbox_xywh):
+        image_bgr = self.get_image_bgr_from_context(context)
+        matrix = self.data_extractor(iuv_arr)
+        segm = self.segm_extractor(iuv_arr)
+        mask = np.zeros(matrix.shape, dtype=np.uint8)
+        mask[segm > 0] = 1
+        image_bgr = self.mask_visualizer.visualize(image_bgr, mask, matrix, bbox_xywh)
+        return image_bgr
+
+
+def _extract_i_from_iuvarr(iuv_arr):
+    return iuv_arr[0, :, :]
+
+
+def _extract_u_from_iuvarr(iuv_arr):
+    return iuv_arr[1, :, :]
+
+
+def _extract_v_from_iuvarr(iuv_arr):
+    return iuv_arr[2, :, :]
+
+
+class DensePoseResultsMplContourVisualizer(DensePoseResultsVisualizer):
+    def __init__(self, levels=10, **kwargs):
+        self.levels = levels
+        self.plot_args = kwargs
+
+    def create_visualization_context(self, image_bgr: Image):
+        import matplotlib.pyplot as plt
+        from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
+
+        context = {}
+        context["image_bgr"] = image_bgr
+        dpi = 100
+        height_inches = float(image_bgr.shape[0]) / dpi
+        width_inches = float(image_bgr.shape[1]) / dpi
+        fig = plt.figure(figsize=(width_inches, height_inches), dpi=dpi)
+        plt.axes([0, 0, 1, 1])
+        plt.axis("off")
+        context["fig"] = fig
+        canvas = FigureCanvas(fig)
+        context["canvas"] = canvas
+        extent = (0, image_bgr.shape[1], image_bgr.shape[0], 0)
+        plt.imshow(image_bgr[:, :, ::-1], extent=extent)
+        return context
+
+    def context_to_image_bgr(self, context):
+        fig = context["fig"]
+        w, h = map(int, fig.get_size_inches() * fig.get_dpi())
+        canvas = context["canvas"]
+        canvas.draw()
+        image_1d = np.fromstring(canvas.tostring_rgb(), dtype="uint8")
+        image_rgb = image_1d.reshape(h, w, 3)
+        image_bgr = image_rgb[:, :, ::-1].copy()
+        return image_bgr
+
+    def visualize_iuv_arr(self, context, iuv_arr: np.ndarray, bbox_xywh: Boxes) -> Image:
+        import matplotlib.pyplot as plt
+
+        u = _extract_u_from_iuvarr(iuv_arr).astype(float) / 255.0
+        v = _extract_v_from_iuvarr(iuv_arr).astype(float) / 255.0
+        extent = (
+            bbox_xywh[0],
+            bbox_xywh[0] + bbox_xywh[2],
+            bbox_xywh[1],
+            bbox_xywh[1] + bbox_xywh[3],
+        )
+        plt.contour(u, self.levels, extent=extent, **self.plot_args)
+        plt.contour(v, self.levels, extent=extent, **self.plot_args)
+
+
+class DensePoseResultsCustomContourVisualizer(DensePoseResultsVisualizer):
+    """
+    Contour visualization using marching squares
+    """
+
+    def __init__(self, levels=10, **kwargs):
+        # TODO: colormap is hardcoded
+        cmap = cv2.COLORMAP_PARULA
+        if isinstance(levels, int):
+            self.levels = np.linspace(0, 1, levels)
+        else:
+            self.levels = levels
+        if "linewidths" in kwargs:
+            self.linewidths = kwargs["linewidths"]
+        else:
+            self.linewidths = [1] * len(self.levels)
+        self.plot_args = kwargs
+        img_colors_bgr = cv2.applyColorMap((self.levels * 255).astype(np.uint8), cmap)
+        self.level_colors_bgr = [
+            [int(v) for v in img_color_bgr.ravel()] for img_color_bgr in img_colors_bgr
+        ]
+
+    def create_visualization_context(self, image_bgr: Image):
+        return image_bgr
+
+    def context_to_image_bgr(self, context):
+        return context
+
+    def get_image_bgr_from_context(self, context):
+        return context
+
+    def visualize_iuv_arr(self, context, iuv_arr: np.ndarray, bbox_xywh: Boxes) -> Image:
+        image_bgr = self.get_image_bgr_from_context(context)
+        segm = _extract_i_from_iuvarr(iuv_arr)
+        u = _extract_u_from_iuvarr(iuv_arr).astype(float) / 255.0
+        v = _extract_v_from_iuvarr(iuv_arr).astype(float) / 255.0
+        self._contours(image_bgr, u, segm, bbox_xywh)
+        self._contours(image_bgr, v, segm, bbox_xywh)
+
+    def _contours(self, image_bgr, arr, segm, bbox_xywh):
+        for part_idx in range(1, DensePoseDataRelative.N_PART_LABELS + 1):
+            mask = segm == part_idx
+            if not np.any(mask):
+                continue
+            arr_min = np.amin(arr[mask])
+            arr_max = np.amax(arr[mask])
+            I, J = np.nonzero(mask)
+            i0 = np.amin(I)
+            i1 = np.amax(I) + 1
+            j0 = np.amin(J)
+            j1 = np.amax(J) + 1
+            if (j1 == j0 + 1) or (i1 == i0 + 1):
+                continue
+            Nw = arr.shape[1] - 1
+            Nh = arr.shape[0] - 1
+            for level_idx, level in enumerate(self.levels):
+                if (level < arr_min) or (level > arr_max):
+                    continue
+                vp = arr[i0:i1, j0:j1] >= level
+                bin_codes = vp[:-1, :-1] + vp[1:, :-1] * 2 + vp[1:, 1:] * 4 + vp[:-1, 1:] * 8
+                mp = mask[i0:i1, j0:j1]
+                bin_mask_codes = mp[:-1, :-1] + mp[1:, :-1] * 2 + mp[1:, 1:] * 4 + mp[:-1, 1:] * 8
+                it = np.nditer(bin_codes, flags=["multi_index"])
+                color_bgr = self.level_colors_bgr[level_idx]
+                linewidth = self.linewidths[level_idx]
+                while not it.finished:
+                    if (it[0] != 0) and (it[0] != 15):
+                        i, j = it.multi_index
+                        if bin_mask_codes[i, j] != 0:
+                            self._draw_line(
+                                image_bgr,
+                                arr,
+                                mask,
+                                level,
+                                color_bgr,
+                                linewidth,
+                                it[0],
+                                it.multi_index,
+                                bbox_xywh,
+                                Nw,
+                                Nh,
+                                (i0, j0),
+                            )
+                    it.iternext()
+
+    def _draw_line(
+        self,
+        image_bgr,
+        arr,
+        mask,
+        v,
+        color_bgr,
+        linewidth,
+        bin_code,
+        multi_idx,
+        bbox_xywh,
+        Nw,
+        Nh,
+        offset,
+    ):
+        lines = self._bin_code_2_lines(arr, v, bin_code, multi_idx, Nw, Nh, offset)
+        x0, y0, w, h = bbox_xywh
+        x1 = x0 + w
+        y1 = y0 + h
+        for line in lines:
+            x0r, y0r = line[0]
+            x1r, y1r = line[1]
+            pt0 = (int(x0 + x0r * (x1 - x0)), int(y0 + y0r * (y1 - y0)))
+            pt1 = (int(x0 + x1r * (x1 - x0)), int(y0 + y1r * (y1 - y0)))
+            cv2.line(image_bgr, pt0, pt1, color_bgr, linewidth)
+
+    def _bin_code_2_lines(self, arr, v, bin_code, multi_idx, Nw, Nh, offset):
+        i0, j0 = offset
+        i, j = multi_idx
+        i += i0
+        j += j0
+        v0, v1, v2, v3 = arr[i, j], arr[i + 1, j], arr[i + 1, j + 1], arr[i, j + 1]
+        x0i = float(j) / Nw
+        y0j = float(i) / Nh
+        He = 1.0 / Nh
+        We = 1.0 / Nw
+        if (bin_code == 1) or (bin_code == 14):
+            a = (v - v0) / (v1 - v0)
+            b = (v - v0) / (v3 - v0)
+            pt1 = (x0i, y0j + a * He)
+            pt2 = (x0i + b * We, y0j)
+            return [(pt1, pt2)]
+        elif (bin_code == 2) or (bin_code == 13):
+            a = (v - v0) / (v1 - v0)
+            b = (v - v1) / (v2 - v1)
+            pt1 = (x0i, y0j + a * He)
+            pt2 = (x0i + b * We, y0j + He)
+            return [(pt1, pt2)]
+        elif (bin_code == 3) or (bin_code == 12):
+            a = (v - v0) / (v3 - v0)
+            b = (v - v1) / (v2 - v1)
+            pt1 = (x0i + a * We, y0j)
+            pt2 = (x0i + b * We, y0j + He)
+            return [(pt1, pt2)]
+        elif (bin_code == 4) or (bin_code == 11):
+            a = (v - v1) / (v2 - v1)
+            b = (v - v3) / (v2 - v3)
+            pt1 = (x0i + a * We, y0j + He)
+            pt2 = (x0i + We, y0j + b * He)
+            return [(pt1, pt2)]
+        elif (bin_code == 6) or (bin_code == 9):
+            a = (v - v0) / (v1 - v0)
+            b = (v - v3) / (v2 - v3)
+            pt1 = (x0i, y0j + a * He)
+            pt2 = (x0i + We, y0j + b * He)
+            return [(pt1, pt2)]
+        elif (bin_code == 7) or (bin_code == 8):
+            a = (v - v0) / (v3 - v0)
+            b = (v - v3) / (v2 - v3)
+            pt1 = (x0i + a * We, y0j)
+            pt2 = (x0i + We, y0j + b * He)
+            return [(pt1, pt2)]
+        elif bin_code == 5:
+            a1 = (v - v0) / (v1 - v0)
+            b1 = (v - v1) / (v2 - v1)
+            pt11 = (x0i, y0j + a1 * He)
+            pt12 = (x0i + b1 * We, y0j + He)
+            a2 = (v - v0) / (v3 - v0)
+            b2 = (v - v3) / (v2 - v3)
+            pt21 = (x0i + a2 * We, y0j)
+            pt22 = (x0i + We, y0j + b2 * He)
+            return [(pt11, pt12), (pt21, pt22)]
+        elif bin_code == 10:
+            a1 = (v - v0) / (v3 - v0)
+            b1 = (v - v0) / (v1 - v0)
+            pt11 = (x0i + a1 * We, y0j)
+            pt12 = (x0i, y0j + b1 * He)
+            a2 = (v - v1) / (v2 - v1)
+            b2 = (v - v3) / (v2 - v3)
+            pt21 = (x0i + a2 * We, y0j + He)
+            pt22 = (x0i + We, y0j + b2 * He)
+            return [(pt11, pt12), (pt21, pt22)]
+        return []
+
+
+try:
+    import matplotlib
+
+    matplotlib.use("Agg")
+    DensePoseResultsContourVisualizer = DensePoseResultsMplContourVisualizer
+except ModuleNotFoundError:
+    logger = logging.getLogger(__name__)
+    logger.warning("Could not import matplotlib, using custom contour visualizer")
+    DensePoseResultsContourVisualizer = DensePoseResultsCustomContourVisualizer
+
+
+class DensePoseResultsFineSegmentationVisualizer(DensePoseMaskedColormapResultsVisualizer):
+    def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7):
+        super(DensePoseResultsFineSegmentationVisualizer, self).__init__(
+            _extract_i_from_iuvarr,
+            _extract_i_from_iuvarr,
+            inplace,
+            cmap,
+            alpha,
+            val_scale=255.0 / DensePoseDataRelative.N_PART_LABELS,
+        )
+
+
+class DensePoseResultsUVisualizer(DensePoseMaskedColormapResultsVisualizer):
+    def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7):
+        super(DensePoseResultsUVisualizer, self).__init__(
+            _extract_u_from_iuvarr, _extract_i_from_iuvarr, inplace, cmap, alpha, val_scale=1.0
+        )
+
+
+class DensePoseResultsVVisualizer(DensePoseMaskedColormapResultsVisualizer):
+    def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7):
+        super(DensePoseResultsVVisualizer, self).__init__(
+            _extract_v_from_iuvarr, _extract_i_from_iuvarr, inplace, cmap, alpha, val_scale=1.0
+        )
+
+
+class DensePoseOutputsFineSegmentationVisualizer(object):
+    def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7):
+        self.mask_visualizer = MatrixVisualizer(
+            inplace=inplace,
+            cmap=cmap,
+            val_scale=255.0 / DensePoseDataRelative.N_PART_LABELS,
+            alpha=alpha,
+        )
+
+    def visualize(
+        self, image_bgr: Image, dp_output_with_bboxes: Optional[Tuple[DensePoseOutput, Boxes]]
+    ) -> Image:
+        if dp_output_with_bboxes is None:
+            return image_bgr
+        densepose_output, bboxes_xywh = dp_output_with_bboxes
+        S = densepose_output.S
+        I = densepose_output.I  # noqa
+        U = densepose_output.U
+        V = densepose_output.V
+        N = S.size(0)
+        assert N == I.size(
+            0
+        ), "densepose outputs S {} and I {}" " should have equal first dim size".format(
+            S.size(), I.size()
+        )
+        assert N == U.size(
+            0
+        ), "densepose outputs S {} and U {}" " should have equal first dim size".format(
+            S.size(), U.size()
+        )
+        assert N == V.size(
+            0
+        ), "densepose outputs S {} and V {}" " should have equal first dim size".format(
+            S.size(), V.size()
+        )
+        assert N == len(
+            bboxes_xywh
+        ), "number of bounding boxes {}" " should be equal to first dim size of outputs {}".format(
+            len(bboxes_xywh), N
+        )
+        for n in range(N):
+            Sn = S[n].argmax(dim=0)
+            In = I[n].argmax(dim=0) * (Sn > 0).long()
+            matrix = In.cpu().numpy().astype(np.uint8)
+            mask = np.zeros(matrix.shape, dtype=np.uint8)
+            mask[matrix > 0] = 1
+            bbox_xywh = bboxes_xywh[n]
+            image_bgr = self.mask_visualizer.visualize(image_bgr, mask, matrix, bbox_xywh)
+        return image_bgr
+
+
+class DensePoseOutputsUVisualizer(object):
+    def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7):
+        self.mask_visualizer = MatrixVisualizer(
+            inplace=inplace, cmap=cmap, val_scale=1.0, alpha=alpha
+        )
+
+    def visualize(
+        self, image_bgr: Image, dp_output_with_bboxes: Optional[Tuple[DensePoseOutput, Boxes]]
+    ) -> Image:
+        if dp_output_with_bboxes is None:
+            return image_bgr
+        densepose_output, bboxes_xywh = dp_output_with_bboxes
+        assert isinstance(
+            densepose_output, DensePoseOutput
+        ), "DensePoseOutput expected, {} encountered".format(type(densepose_output))
+        S = densepose_output.S
+        I = densepose_output.I  # noqa
+        U = densepose_output.U
+        V = densepose_output.V
+        N = S.size(0)
+        assert N == I.size(
+            0
+        ), "densepose outputs S {} and I {}" " should have equal first dim size".format(
+            S.size(), I.size()
+        )
+        assert N == U.size(
+            0
+        ), "densepose outputs S {} and U {}" " should have equal first dim size".format(
+            S.size(), U.size()
+        )
+        assert N == V.size(
+            0
+        ), "densepose outputs S {} and V {}" " should have equal first dim size".format(
+            S.size(), V.size()
+        )
+        assert N == len(
+            bboxes_xywh
+        ), "number of bounding boxes {}" " should be equal to first dim size of outputs {}".format(
+            len(bboxes_xywh), N
+        )
+        for n in range(N):
+            Sn = S[n].argmax(dim=0)
+            In = I[n].argmax(dim=0) * (Sn > 0).long()
+            segmentation = In.cpu().numpy().astype(np.uint8)
+            mask = np.zeros(segmentation.shape, dtype=np.uint8)
+            mask[segmentation > 0] = 1
+            Un = U[n].cpu().numpy().astype(np.float32)
+            Uvis = np.zeros(segmentation.shape, dtype=np.float32)
+            for partId in range(Un.shape[0]):
+                Uvis[segmentation == partId] = Un[partId][segmentation == partId].clip(0, 1) * 255
+                bbox_xywh = bboxes_xywh[n]
+            image_bgr = self.mask_visualizer.visualize(image_bgr, mask, Uvis, bbox_xywh)
+        return image_bgr
+
+
+class DensePoseOutputsVVisualizer(object):
+    def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7):
+        self.mask_visualizer = MatrixVisualizer(
+            inplace=inplace, cmap=cmap, val_scale=1.0, alpha=alpha
+        )
+
+    def visualize(
+        self, image_bgr: Image, dp_output_with_bboxes: Optional[Tuple[DensePoseOutput, Boxes]]
+    ) -> Image:
+        if dp_output_with_bboxes is None:
+            return image_bgr
+        densepose_output, bboxes_xywh = dp_output_with_bboxes
+        assert isinstance(
+            densepose_output, DensePoseOutput
+        ), "DensePoseOutput expected, {} encountered".format(type(densepose_output))
+        S = densepose_output.S
+        I = densepose_output.I  # noqa
+        U = densepose_output.U
+        V = densepose_output.V
+        N = S.size(0)
+        assert N == I.size(
+            0
+        ), "densepose outputs S {} and I {}" " should have equal first dim size".format(
+            S.size(), I.size()
+        )
+        assert N == U.size(
+            0
+        ), "densepose outputs S {} and U {}" " should have equal first dim size".format(
+            S.size(), U.size()
+        )
+        assert N == V.size(
+            0
+        ), "densepose outputs S {} and V {}" " should have equal first dim size".format(
+            S.size(), V.size()
+        )
+        assert N == len(
+            bboxes_xywh
+        ), "number of bounding boxes {}" " should be equal to first dim size of outputs {}".format(
+            len(bboxes_xywh), N
+        )
+        for n in range(N):
+            Sn = S[n].argmax(dim=0)
+            In = I[n].argmax(dim=0) * (Sn > 0).long()
+            segmentation = In.cpu().numpy().astype(np.uint8)
+            mask = np.zeros(segmentation.shape, dtype=np.uint8)
+            mask[segmentation > 0] = 1
+            Vn = V[n].cpu().numpy().astype(np.float32)
+            Vvis = np.zeros(segmentation.shape, dtype=np.float32)
+            for partId in range(Vn.size(0)):
+                Vvis[segmentation == partId] = Vn[partId][segmentation == partId].clip(0, 1) * 255
+            bbox_xywh = bboxes_xywh[n]
+            image_bgr = self.mask_visualizer.visualize(image_bgr, mask, Vvis, bbox_xywh)
+        return image_bgr
+
+
+class DensePoseDataCoarseSegmentationVisualizer(object):
+    """
+    Visualizer for ground truth segmentation
+    """
+
+    def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7):
+        self.mask_visualizer = MatrixVisualizer(
+            inplace=inplace,
+            cmap=cmap,
+            val_scale=255.0 / DensePoseDataRelative.N_BODY_PARTS,
+            alpha=alpha,
+        )
+
+    def visualize(
+        self,
+        image_bgr: Image,
+        bbox_densepose_datas: Optional[Tuple[Iterable[Boxes], Iterable[DensePoseDataRelative]]],
+    ) -> Image:
+        if bbox_densepose_datas is None:
+            return image_bgr
+        for bbox_xywh, densepose_data in zip(*bbox_densepose_datas):
+            matrix = densepose_data.segm.numpy()
+            mask = np.zeros(matrix.shape, dtype=np.uint8)
+            mask[matrix > 0] = 1
+            image_bgr = self.mask_visualizer.visualize(image_bgr, mask, matrix, bbox_xywh.numpy())
+        return image_bgr
+
+
+class DensePoseDataPointsVisualizer(object):
+    def __init__(self, densepose_data_to_value_fn=None, cmap=cv2.COLORMAP_PARULA):
+        self.points_visualizer = PointsVisualizer()
+        self.densepose_data_to_value_fn = densepose_data_to_value_fn
+        self.cmap = cmap
+
+    def visualize(
+        self,
+        image_bgr: Image,
+        bbox_densepose_datas: Optional[Tuple[Iterable[Boxes], Iterable[DensePoseDataRelative]]],
+    ) -> Image:
+        if bbox_densepose_datas is None:
+            return image_bgr
+        for bbox_xywh, densepose_data in zip(*bbox_densepose_datas):
+            x0, y0, w, h = bbox_xywh.numpy()
+            x = densepose_data.x.numpy() * w / 255.0 + x0
+            y = densepose_data.y.numpy() * h / 255.0 + y0
+            pts_xy = zip(x, y)
+            if self.densepose_data_to_value_fn is None:
+                image_bgr = self.points_visualizer.visualize(image_bgr, pts_xy)
+            else:
+                v = self.densepose_data_to_value_fn(densepose_data)
+                img_colors_bgr = cv2.applyColorMap(v, self.cmap)
+                colors_bgr = [
+                    [int(v) for v in img_color_bgr.ravel()] for img_color_bgr in img_colors_bgr
+                ]
+                image_bgr = self.points_visualizer.visualize(image_bgr, pts_xy, colors_bgr)
+        return image_bgr
+
+
+def _densepose_data_u_for_cmap(densepose_data):
+    u = np.clip(densepose_data.u.numpy(), 0, 1) * 255.0
+    return u.astype(np.uint8)
+
+
+def _densepose_data_v_for_cmap(densepose_data):
+    v = np.clip(densepose_data.v.numpy(), 0, 1) * 255.0
+    return v.astype(np.uint8)
+
+
+def _densepose_data_i_for_cmap(densepose_data):
+    i = (
+        np.clip(densepose_data.i.numpy(), 0.0, DensePoseDataRelative.N_PART_LABELS)
+        * 255.0
+        / DensePoseDataRelative.N_PART_LABELS
+    )
+    return i.astype(np.uint8)
+
+
+class DensePoseDataPointsUVisualizer(DensePoseDataPointsVisualizer):
+    def __init__(self):
+        super(DensePoseDataPointsUVisualizer, self).__init__(
+            densepose_data_to_value_fn=_densepose_data_u_for_cmap
+        )
+
+
+class DensePoseDataPointsVVisualizer(DensePoseDataPointsVisualizer):
+    def __init__(self):
+        super(DensePoseDataPointsVVisualizer, self).__init__(
+            densepose_data_to_value_fn=_densepose_data_v_for_cmap
+        )
+
+
+class DensePoseDataPointsIVisualizer(DensePoseDataPointsVisualizer):
+    def __init__(self):
+        super(DensePoseDataPointsIVisualizer, self).__init__(
+            densepose_data_to_value_fn=_densepose_data_i_for_cmap
+        )
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/vis/extractor.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/vis/extractor.py
@@ -0,0 +1,152 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+import logging
+from typing import Sequence
+import torch
+
+from detectron2.layers.nms import batched_nms
+from detectron2.structures.instances import Instances
+
+from densepose.vis.bounding_box import BoundingBoxVisualizer, ScoredBoundingBoxVisualizer
+from densepose.vis.densepose import DensePoseResultsVisualizer
+
+from .base import CompoundVisualizer
+
+Scores = Sequence[float]
+
+
+def extract_scores_from_instances(instances: Instances, select=None):
+    if instances.has("scores"):
+        return instances.scores if select is None else instances.scores[select]
+    return None
+
+
+def extract_boxes_xywh_from_instances(instances: Instances, select=None):
+    if instances.has("pred_boxes"):
+        boxes_xywh = instances.pred_boxes.tensor.clone()
+        boxes_xywh[:, 2] -= boxes_xywh[:, 0]
+        boxes_xywh[:, 3] -= boxes_xywh[:, 1]
+        return boxes_xywh if select is None else boxes_xywh[select]
+    return None
+
+
+def create_extractor(visualizer: object):
+    """
+    Create an extractor for the provided visualizer
+    """
+    if isinstance(visualizer, CompoundVisualizer):
+        extractors = [create_extractor(v) for v in visualizer.visualizers]
+        return CompoundExtractor(extractors)
+    elif isinstance(visualizer, DensePoseResultsVisualizer):
+        return DensePoseResultExtractor()
+    elif isinstance(visualizer, ScoredBoundingBoxVisualizer):
+        return CompoundExtractor([extract_boxes_xywh_from_instances, extract_scores_from_instances])
+    elif isinstance(visualizer, BoundingBoxVisualizer):
+        return extract_boxes_xywh_from_instances
+    else:
+        logger = logging.getLogger(__name__)
+        logger.error(f"Could not create extractor for {visualizer}")
+        return None
+
+
+class BoundingBoxExtractor(object):
+    """
+    Extracts bounding boxes from instances
+    """
+
+    def __call__(self, instances: Instances):
+        boxes_xywh = extract_boxes_xywh_from_instances(instances)
+        return boxes_xywh
+
+
+class ScoredBoundingBoxExtractor(object):
+    """
+    Extracts bounding boxes from instances
+    """
+
+    def __call__(self, instances: Instances, select=None):
+        scores = extract_scores_from_instances(instances)
+        boxes_xywh = extract_boxes_xywh_from_instances(instances)
+        if (scores is None) or (boxes_xywh is None):
+            return (boxes_xywh, scores)
+        if select is not None:
+            scores = scores[select]
+            boxes_xywh = boxes_xywh[select]
+        return (boxes_xywh, scores)
+
+
+class DensePoseResultExtractor(object):
+    """
+    Extracts DensePose result from instances
+    """
+
+    def __call__(self, instances: Instances, select=None):
+        boxes_xywh = extract_boxes_xywh_from_instances(instances)
+        if instances.has("pred_densepose") and (boxes_xywh is not None):
+            dpout = instances.pred_densepose
+            if select is not None:
+                dpout = dpout[select]
+                boxes_xywh = boxes_xywh[select]
+            return dpout.to_result(boxes_xywh)
+        else:
+            return None
+
+
+class CompoundExtractor(object):
+    """
+    Extracts data for CompoundVisualizer
+    """
+
+    def __init__(self, extractors):
+        self.extractors = extractors
+
+    def __call__(self, instances: Instances, select=None):
+        datas = []
+        for extractor in self.extractors:
+            data = extractor(instances, select)
+            datas.append(data)
+        return datas
+
+
+class NmsFilteredExtractor(object):
+    """
+    Extracts data in the format accepted by NmsFilteredVisualizer
+    """
+
+    def __init__(self, extractor, iou_threshold):
+        self.extractor = extractor
+        self.iou_threshold = iou_threshold
+
+    def __call__(self, instances: Instances, select=None):
+        scores = extract_scores_from_instances(instances)
+        boxes_xywh = extract_boxes_xywh_from_instances(instances)
+        if boxes_xywh is None:
+            return None
+        select_local_idx = batched_nms(
+            boxes_xywh,
+            scores,
+            torch.zeros(len(scores), dtype=torch.int32),
+            iou_threshold=self.iou_threshold,
+        ).squeeze()
+        select_local = torch.zeros(len(boxes_xywh), dtype=torch.bool, device=boxes_xywh.device)
+        select_local[select_local_idx] = True
+        select = select_local if select is None else (select & select_local)
+        return self.extractor(instances, select=select)
+
+
+class ScoreThresholdedExtractor(object):
+    """
+    Extracts data in the format accepted by ScoreThresholdedVisualizer
+    """
+
+    def __init__(self, extractor, min_score):
+        self.extractor = extractor
+        self.min_score = min_score
+
+    def __call__(self, instances: Instances, select=None):
+        scores = extract_scores_from_instances(instances)
+        if scores is None:
+            return None
+        select_local = scores > self.min_score
+        select = select_local if select is None else (select & select_local)
+        data = self.extractor(instances, select=select)
+        return data
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/dev/README.md
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/dev/README.md
@@ -0,0 +1,7 @@
+
+## Some scripts for developers to use, include:
+
+- `run_instant_tests.sh`: run training for a few iterations.
+- `run_inference_tests.sh`: run inference on a small dataset.
+- `../../dev/linter.sh`: lint the codebase before commit
+- `../../dev/parse_results.sh`: parse results from log file.
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/dev/run_inference_tests.sh
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/dev/run_inference_tests.sh
@@ -0,0 +1,33 @@
+#!/bin/bash -e
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+BIN="python train_net.py"
+OUTPUT="inference_test_output"
+NUM_GPUS=2
+IMS_PER_GPU=2
+IMS_PER_BATCH=$(( NUM_GPUS * IMS_PER_GPU ))
+
+CFG_LIST=( "${@:1}" )
+
+if [ ${#CFG_LIST[@]} -eq 0 ]; then
+  CFG_LIST=( ./configs/quick_schedules/*inference_acc_test.yaml )
+fi
+
+echo "========================================================================"
+echo "Configs to run:"
+echo "${CFG_LIST[@]}"
+echo "========================================================================"
+
+for cfg in "${CFG_LIST[@]}"; do
+    echo "========================================================================"
+    echo "Running $cfg ..."
+    echo "========================================================================"
+    $BIN \
+      --eval-only \
+      --num-gpus $NUM_GPUS \
+      --config-file "$cfg" \
+      OUTPUT_DIR "$OUTPUT" \
+      SOLVER.IMS_PER_BATCH $IMS_PER_BATCH
+    rm -rf $OUTPUT
+done
+
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/dev/run_instant_tests.sh
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/dev/run_instant_tests.sh
@@ -0,0 +1,28 @@
+#!/bin/bash -e
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+BIN="python train_net.py"
+OUTPUT="instant_test_output"
+NUM_GPUS=2
+SOLVER_IMS_PER_BATCH=$((NUM_GPUS * 2))
+
+CFG_LIST=( "${@:1}" )
+if [ ${#CFG_LIST[@]} -eq 0 ]; then
+  CFG_LIST=( ./configs/quick_schedules/*instant_test.yaml )
+fi
+
+echo "========================================================================"
+echo "Configs to run:"
+echo "${CFG_LIST[@]}"
+echo "========================================================================"
+
+for cfg in "${CFG_LIST[@]}"; do
+    echo "========================================================================"
+    echo "Running $cfg ..."
+    echo "========================================================================"
+    $BIN --num-gpus $NUM_GPUS --config-file "$cfg" \
+      SOLVER.IMS_PER_BATCH $SOLVER_IMS_PER_BATCH \
+      OUTPUT_DIR "$OUTPUT"
+    rm -rf "$OUTPUT"
+done
+
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/doc/GETTING_STARTED.md
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/doc/GETTING_STARTED.md
@@ -0,0 +1,58 @@
+# Getting Started with DensePose
+
+## Inference with Pre-trained Models
+
+1. Pick a model and its config file from [Model Zoo](MODEL_ZOO.md), for example [densepose_rcnn_R_50_FPN_s1x.yaml](../configs/densepose_rcnn_R_50_FPN_s1x.yaml)
+2. Run the [Apply Net](TOOL_APPLY_NET.md) tool to visualize the results or save the to disk. For example, to use contour visualization for DensePose, one can run:
+```bash
+python apply_net.py show configs/densepose_rcnn_R_50_FPN_s1x.yaml densepose_rcnn_R_50_FPN_s1x.pkl image.jpg dp_contour,bbox --output image_densepose_contour.png
+```
+Please see [Apply Net](TOOL_APPLY_NET.md) for more details on the tool.
+
+## Training
+
+First, prepare the [dataset](http://densepose.org/#dataset) into the following structure under the directory you'll run training scripts:
+<pre>
+datasets/coco/
+  annotations/
+    densepose_{train,minival,valminusminival}2014.json
+    <a href="https://dl.fbaipublicfiles.com/detectron2/densepose/densepose_minival2014_100.json">densepose_minival2014_100.json </a>  (optional, for testing only)
+  {train,val}2014/
+    # image files that are mentioned in the corresponding json
+</pre>
+
+To train a model one can use the [train_net.py](../train_net.py) script.
+This script was used to train all DensePose models in [Model Zoo](MODEL_ZOO.md).
+For example, to launch end-to-end DensePose-RCNN training with ResNet-50 FPN backbone
+on 8 GPUs following the s1x schedule, one can run
+```bash
+python train_net.py --config-file configs/densepose_rcnn_R_50_FPN_s1x.yaml --num-gpus 8
+```
+The configs are made for 8-GPU training. To train on 1 GPU, one can apply the
+[linear learning rate scaling rule](https://arxiv.org/abs/1706.02677):
+```bash
+python train_net.py --config-file configs/densepose_rcnn_R_50_FPN_s1x.yaml \
+    SOLVER.IMS_PER_BATCH 2 SOLVER.BASE_LR 0.0025
+```
+
+## Evaluation
+
+Model testing can be done in the same way as training, except for an additional flag `--eval-only` and
+model location specification through `MODEL.WEIGHTS model.pth` in the command line
+```bash
+python train_net.py --config-file configs/densepose_rcnn_R_50_FPN_s1x.yaml \
+    --eval-only MODEL.WEIGHTS model.pth
+```
+
+## Tools
+
+We provide tools which allow one to:
+ - easily view DensePose annotated data in a dataset;
+ - perform DensePose inference on a set of images;
+ - visualize DensePose model results;
+
+`query_db` is a tool to print or visualize DensePose data in a dataset.
+Please refer to [Query DB](TOOL_QUERY_DB.md) for more details on this tool
+
+`apply_net` is a tool to print or visualize DensePose results.
+Please refer to [Apply Net](TOOL_APPLY_NET.md) for more details on this tool
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/doc/MODEL_ZOO.md
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/doc/MODEL_ZOO.md
@@ -0,0 +1,277 @@
+# Model Zoo and Baselines
+
+# Introduction
+
+We provide baselines trained with Detectron2 DensePose. The corresponding
+configuration files can be found in the [configs](../configs) directory.
+All models were trained on COCO `train2014` + `valminusminival2014` and
+evaluated on COCO `minival2014`. For the details on common settings in which
+baselines were trained, please check [Detectron 2 Model Zoo](../../../MODEL_ZOO.md).
+
+## License
+
+All models available for download through this document are licensed under the
+[Creative Commons Attribution-ShareAlike 3.0 license](https://creativecommons.org/licenses/by-sa/3.0/)
+
+## COCO DensePose Baselines with DensePose-RCNN
+
+### Legacy Models
+
+Baselines trained using schedules from [Güler et al, 2018](https://arxiv.org/pdf/1802.00434.pdf)
+
+<table><tbody>
+<!-- START TABLE -->
+<!-- TABLE HEADER -->
+<th valign="bottom">Name</th>
+<th valign="bottom">lr<br/>sched</th>
+<th valign="bottom">train<br/>time<br/>(s/iter)</th>
+<th valign="bottom">inference<br/>time<br/>(s/im)</th>
+<th valign="bottom">train<br/>mem<br/>(GB)</th>
+<th valign="bottom">box<br/>AP</th>
+<th valign="bottom">dp. AP<br/>GPS</th>
+<th valign="bottom">dp. AP<br/>GPSm</th>
+<th valign="bottom">model id</th>
+<th valign="bottom">download</th>
+<!-- TABLE BODY -->
+<!-- ROW: densepose_rcnn_R_50_FPN_s1x_legacy -->
+ <tr><td align="left"><a href="../configs/densepose_rcnn_R_50_FPN_s1x_legacy.yaml">R_50_FPN_s1x_legacy</a></td>
+ <td align="center">s1x</td>
+ <td align="center">0.307</td>
+ <td align="center">0.051</td>
+ <td align="center">3.2</td>
+ <td align="center">58.1</td>
+ <td align="center">52.1</td>
+ <td align="center">54.9</td>
+ <td align="center">164832157</td>
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_s1x_legacy/164832157/model_final_d366fa.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_s1x_legacy/164832157/metrics.json">metrics</a></td>
+ </tr>
+ <!-- ROW: densepose_rcnn_R_101_FPN_s1x_legacy -->
+  <tr><td align="left"><a href="../configs/densepose_rcnn_R_101_FPN_s1x_legacy.yaml">R_101_FPN_s1x_legacy</a></td>
+  <td align="center">s1x</td>
+  <td align="center">0.390</td>
+  <td align="center">0.063</td>
+  <td align="center">4.3</td>
+  <td align="center">59.5</td>
+  <td align="center">53.2</td>
+  <td align="center">56.1</td>
+  <td align="center">164832182</td>
+  <td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_s1x_legacy/164832182/model_final_10af0e.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_s1x_legacy/164832182/metrics.json">metrics</a></td>
+  </tr>
+</tbody></table>
+
+### Improved Baselines, Original Fully Convolutional Haad
+
+These models use an improved training schedule and Panoptic FPN head from [Kirillov et al, 2019](https://arxiv.org/abs/1901.02446).
+
+<table><tbody>
+  <!-- START TABLE -->
+  <!-- TABLE HEADER -->
+  <th valign="bottom">Name</th>
+  <th valign="bottom">lr<br/>sched</th>
+  <th valign="bottom">train<br/>time<br/>(s/iter)</th>
+  <th valign="bottom">inference<br/>time<br/>(s/im)</th>
+  <th valign="bottom">train<br/>mem<br/>(GB)</th>
+  <th valign="bottom">box<br/>AP</th>
+  <th valign="bottom">dp. AP<br/>GPS</th>
+  <th valign="bottom">dp. AP<br/>GPSm</th>
+  <th valign="bottom">model id</th>
+  <th valign="bottom">download</th>
+  <!-- TABLE BODY -->
+  <!-- ROW: densepose_rcnn_R_50_FPN_s1x -->
+   <tr><td align="left"><a href="../configs/densepose_rcnn_R_50_FPN_s1x.yaml">R_50_FPN_s1x</a></td>
+   <td align="center">s1x</td>
+   <td align="center">0.359</td>
+   <td align="center">0.066</td>
+   <td align="center">4.5</td>
+   <td align="center">61.2</td>
+   <td align="center">63.7</td>
+   <td align="center">65.3</td>
+   <td align="center">165712039</td>
+   <td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_s1x/165712039/model_final_162be9.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_s1x/165712039/metrics.json">metrics</a></td>
+   </tr>
+   <!-- ROW: densepose_rcnn_R_101_FPN_s1x -->
+    <tr><td align="left"><a href="../configs/densepose_rcnn_R_101_FPN_s1x.yaml">R_101_FPN_s1x</a></td>
+    <td align="center">s1x</td>
+    <td align="center">0.428</td>
+    <td align="center">0.079</td>
+    <td align="center">5.8</td>
+    <td align="center">62.3</td>
+    <td align="center">64.5</td>
+    <td align="center">66.4</td>
+    <td align="center">165712084</td>
+    <td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_s1x/165712084/model_final_c6ab63.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_s1x/165712084/metrics.json">metrics</a></td>
+    </tr>
+    </tbody></table>
+
+### Improved Baselines, DeepLabV3 Head
+
+These models use an improved training schedule, Panoptic FPN head from [Kirillov et al, 2019](https://arxiv.org/abs/1901.02446) and DeepLabV3 head from [Chen et al, 2017](https://arxiv.org/abs/1706.05587).
+
+<table><tbody>
+    <!-- START TABLE -->
+    <!-- TABLE HEADER -->
+    <th valign="bottom">Name</th>
+    <th valign="bottom">lr<br/>sched</th>
+    <th valign="bottom">train<br/>time<br/>(s/iter)</th>
+    <th valign="bottom">inference<br/>time<br/>(s/im)</th>
+    <th valign="bottom">train<br/>mem<br/>(GB)</th>
+    <th valign="bottom">box<br/>AP</th>
+    <th valign="bottom">dp. AP<br/>GPS</th>
+    <th valign="bottom">dp. AP<br/>GPSm</th>
+    <th valign="bottom">model id</th>
+    <th valign="bottom">download</th>
+    <!-- TABLE BODY -->
+    <!-- ROW: densepose_rcnn_R_50_FPN_DL_s1x -->
+     <tr><td align="left"><a href="../configs/densepose_rcnn_R_50_FPN_DL_s1x.yaml">R_50_FPN_DL_s1x</a></td>
+     <td align="center">s1x</td>
+     <td align="center">0.392</td>
+     <td align="center">0.070</td>
+     <td align="center">6.7</td>
+     <td align="center">61.1</td>
+     <td align="center">65.6</td>
+     <td align="center">66.8</td>
+     <td align="center">165712097</td>
+     <td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_DL_s1x/165712097/model_final_0ed407.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_DL_s1x/165712097/metrics.json">metrics</a></td>
+     </tr>
+     <!-- ROW: densepose_rcnn_R_101_FPN_DL_s1x -->
+      <tr><td align="left"><a href="../configs/densepose_rcnn_R_101_FPN_DL_s1x.yaml">R_101_FPN_DL_s1x</a></td>
+      <td align="center">s1x</td>
+      <td align="center">0.478</td>
+      <td align="center">0.083</td>
+      <td align="center">7.0</td>
+      <td align="center">62.3</td>
+      <td align="center">66.3</td>
+      <td align="center">67.7</td>
+      <td align="center">165712116</td>
+      <td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_DL_s1x/165712116/model_final_844d15.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_DL_s1x/165712116/metrics.json">metrics</a></td>
+      </tr>
+</tbody></table>
+
+### Baselines with Confidence Estimation
+
+These models perform additional estimation of confidence in regressed UV coodrinates, along the lines of [Neverova et al., 2019](https://papers.nips.cc/paper/8378-correlated-uncertainty-for-learning-dense-correspondences-from-noisy-labels).
+
+<table><tbody>
+<!-- START TABLE -->
+<!-- TABLE HEADER -->
+<th valign="bottom">Name</th>
+<th valign="bottom">lr<br/>sched</th>
+<th valign="bottom">train<br/>time<br/>(s/iter)</th>
+<th valign="bottom">inference<br/>time<br/>(s/im)</th>
+<th valign="bottom">train<br/>mem<br/>(GB)</th>
+<th valign="bottom">box<br/>AP</th>
+<th valign="bottom">dp. AP<br/>GPS</th>
+<th valign="bottom">dp. AP<br/>GPSm</th>
+<th valign="bottom">model id</th>
+<th valign="bottom">download</th>
+<!-- TABLE BODY --> 
+<!-- ROW: densepose_rcnn_R_50_FPN_WC1_s1x --> 
+ <tr><td align="left"><a href="../configs/densepose_rcnn_R_50_FPN_WC1_s1x.yaml">R_50_FPN_WC1_s1x</a></td>
+<td align="center">s1x</td>
+<td align="center">0.353</td>
+<td align="center">0.064</td>
+<td align="center">4.6</td>
+<td align="center">60.5</td>
+<td align="center">64.2</td>
+<td align="center">65.6</td>
+<td align="center">173862049</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_WC1_s1x/173862049/model_final_289019.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_WC1_s1x/173862049/metrics.json">metrics</a></td>
+</tr>
+<!-- ROW: densepose_rcnn_R_50_FPN_WC2_s1x --> 
+ <tr><td align="left"><a href="../configs/densepose_rcnn_R_50_FPN_WC2_s1x.yaml">R_50_FPN_WC2_s1x</a></td>
+<td align="center">s1x</td>
+<td align="center">0.364</td>
+<td align="center">0.066</td>
+<td align="center">4.8</td>
+<td align="center">60.7</td>
+<td align="center">64.2</td>
+<td align="center">65.7</td>
+<td align="center">173861455</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_WC2_s1x/173861455/model_final_3abe14.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_WC2_s1x/173861455/metrics.json">metrics</a></td>
+</tr>
+<!-- ROW: densepose_rcnn_R_50_FPN_DL_WC1_s1x --> 
+ <tr><td align="left"><a href="../configs/densepose_rcnn_R_50_FPN_DL_WC1_s1x.yaml">R_50_FPN_DL_WC1_s1x</a></td>
+<td align="center">s1x</td>
+<td align="center">0.397</td>
+<td align="center">0.068</td>
+<td align="center">6.7</td>
+<td align="center">61.1</td>
+<td align="center">65.8</td>
+<td align="center">67.1</td>
+<td align="center">173067973</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_DL_WC1_s1x/173067973/model_final_b1e525.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_DL_WC1_s1x/173067973/metrics.json">metrics</a></td>
+</tr>
+<!-- ROW: densepose_rcnn_R_50_FPN_DL_WC2_s1x --> 
+ <tr><td align="left"><a href="../configs/densepose_rcnn_R_50_FPN_DL_WC2_s1x.yaml">R_50_FPN_DL_WC2_s1x</a></td>
+<td align="center">s1x</td>
+<td align="center">0.410</td>
+<td align="center">0.070</td>
+<td align="center">6.8</td>
+<td align="center">60.8</td>
+<td align="center">65.6</td>
+<td align="center">66.7</td>
+<td align="center">173859335</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_DL_WC2_s1x/173859335/model_final_60fed4.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_DL_WC2_s1x/173859335/metrics.json">metrics</a></td>
+</tr>
+<!-- ROW: densepose_rcnn_R_101_FPN_WC1_s1x --> 
+ <tr><td align="left"><a href="../configs/densepose_rcnn_R_101_FPN_WC1_s1x.yaml">R_101_FPN_WC1_s1x</a></td>
+<td align="center">s1x</td>
+<td align="center">0.435</td>
+<td align="center">0.076</td>
+<td align="center">5.7</td>
+<td align="center">62.5</td>
+<td align="center">64.9</td>
+<td align="center">66.5</td>
+<td align="center">171402969</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_WC1_s1x/171402969/model_final_9e47f0.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_WC1_s1x/171402969/metrics.json">metrics</a></td>
+</tr>
+<!-- ROW: densepose_rcnn_R_101_FPN_WC2_s1x --> 
+ <tr><td align="left"><a href="../configs/densepose_rcnn_R_101_FPN_WC2_s1x.yaml">R_101_FPN_WC2_s1x</a></td>
+<td align="center">s1x</td>
+<td align="center">0.450</td>
+<td align="center">0.078</td>
+<td align="center">5.7</td>
+<td align="center">62.3</td>
+<td align="center">64.8</td>
+<td align="center">66.6</td>
+<td align="center">173860702</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_WC2_s1x/173860702/model_final_5ea023.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_WC2_s1x/173860702/metrics.json">metrics</a></td>
+</tr>
+<!-- ROW: densepose_rcnn_R_101_FPN_DL_WC1_s1x --> 
+ <tr><td align="left"><a href="../configs/densepose_rcnn_R_101_FPN_DL_WC1_s1x.yaml">R_101_FPN_DL_WC1_s1x</a></td>
+<td align="center">s1x</td>
+<td align="center">0.479</td>
+<td align="center">0.081</td>
+<td align="center">7.9</td>
+<td align="center">62.0</td>
+<td align="center">66.2</td>
+<td align="center">67.4</td>
+<td align="center">173858525</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_DL_WC1_s1x/173858525/model_final_f359f3.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_DL_WC1_s1x/173858525/metrics.json">metrics</a></td>
+</tr>
+<!-- ROW: densepose_rcnn_R_101_FPN_DL_WC2_s1x --> 
+ <tr><td align="left"><a href="../configs/densepose_rcnn_R_101_FPN_DL_WC2_s1x.yaml">R_101_FPN_DL_WC2_s1x</a></td>
+<td align="center">s1x</td>
+<td align="center">0.491</td>
+<td align="center">0.082</td>
+<td align="center">7.6</td>
+<td align="center">61.7</td>
+<td align="center">65.9</td>
+<td align="center">67.3</td>
+<td align="center">173294801</td>
+<td align="center"><a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_DL_WC2_s1x/173294801/model_final_6e1ed1.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_101_FPN_DL_WC2_s1x/173294801/metrics.json">metrics</a></td>
+</tr>
+</tbody></table>
+
+## Old Baselines
+
+It is still possible to use some baselines from [DensePose 1](https://github.com/facebookresearch/DensePose).
+Below are evaluation metrics for the baselines recomputed in the current framework:
+
+| Model | bbox AP | AP  |  AP50 | AP75  | APm  |APl |
+|-----|-----|-----|---    |---    |---   |--- |
+| [`ResNet50_FPN_s1x-e2e`](https://dl.fbaipublicfiles.com/densepose/DensePose_ResNet50_FPN_s1x-e2e.pkl) | 54.673 | 48.894 | 84.963 | 50.717 | 43.132 | 50.433 |
+| [`ResNet101_FPN_s1x-e2e`](https://dl.fbaipublicfiles.com/densepose/DensePose_ResNet101_FPN_s1x-e2e.pkl) | 56.032 | 51.088 | 86.250 | 55.057 | 46.542 | 52.563 |
+
+Note: these scores are close, but not strictly equal to the ones reported in the [DensePose 1 Model Zoo](https://github.com/facebookresearch/DensePose/blob/master/MODEL_ZOO.md),
+which is due to small incompatibilities between the frameworks.
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/doc/TOOL_APPLY_NET.md
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/doc/TOOL_APPLY_NET.md
@@ -0,0 +1,130 @@
+# Apply Net
+
+`apply_net` is a tool to print or visualize DensePose results on a set of images.
+It has two modes: `dump` to save DensePose model results to a pickle file
+and `show` to visualize them on images.
+
+## Dump Mode
+
+The general command form is:
+```bash
+python apply_net.py dump [-h] [-v] [--output <dump_file>] <config> <model> <input>
+```
+
+There are three mandatory arguments:
+ - `<config>`, configuration file for a given model;
+ - `<model>`, model file with trained parameters
+ - `<input>`, input image file name, pattern or folder
+
+One can additionally provide `--output` argument to define the output file name,
+which defaults to `output.pkl`.
+
+
+Examples:
+
+1. Dump results of a DensePose model with ResNet-50 FPN backbone for images
+   in a folder `images` to file `dump.pkl`:
+```bash
+python apply_net.py dump configs/densepose_rcnn_R_50_FPN_s1x.yaml DensePose_ResNet50_FPN_s1x-e2e.pkl images --output dump.pkl -v
+```
+
+2. Dump results of a DensePose model with ResNet-50 FPN backbone for images
+   with file name matching a pattern `image*.jpg` to file `results.pkl`:
+```bash
+python apply_net.py dump configs/densepose_rcnn_R_50_FPN_s1x.yaml DensePose_ResNet50_FPN_s1x-e2e.pkl "image*.jpg" --output results.pkl -v
+```
+
+If you want to load the pickle file generated by the above command:
+```
+# make sure DensePose is in your PYTHONPATH, or use the following line to add it:
+sys.path.append("/your_detectron2_path/detectron2_repo/projects/DensePose/")
+
+f = open('/your_result_path/results.pkl', 'rb')
+data = pickle.load(f)
+```
+
+The file `results.pkl` contains the list of results per image, for each image the result is a dictionary:
+```
+data: [{'file_name': '/your_path/image1.jpg',
+        'scores': tensor([0.9884]),
+        'pred_boxes_XYXY': tensor([[ 69.6114,   0.0000, 706.9797, 706.0000]]),
+        'pred_densepose': <densepose.structures.DensePoseResult object at 0x7f791b312470>},
+       {'file_name': '/your_path/image2.jpg',
+        'scores': tensor([0.9999, 0.5373, 0.3991]),
+        'pred_boxes_XYXY': tensor([[ 59.5734,   7.7535, 579.9311, 932.3619],
+                                   [612.9418, 686.1254, 612.9999, 704.6053],
+                                   [164.5081, 407.4034, 598.3944, 920.4266]]),
+        'pred_densepose': <densepose.structures.DensePoseResult object at 0x7f7071229be0>}]
+```
+
+We can use the following code, to parse the outputs of the first
+detected instance on the first image.
+```
+img_id, instance_id = 0, 0  # Look at the first image and the first detected instance
+bbox_xyxy = data[img_id]['pred_boxes_XYXY'][instance_id]
+result_encoded = data[img_id]['pred_densepose'].results[instance_id]
+iuv_arr = DensePoseResult.decode_png_data(*result_encoded)
+```
+The array `bbox_xyxy` contains (x0, y0, x1, y1) of the bounding box.
+
+The shape of `iuv_arr` is `[3, H, W]`, where (H, W) is the shape of the bounding box.
+- `iuv_arr[0,:,:]`: The patch index of image points, indicating which of the 24 surface patches the point is on.
+- `iuv_arr[1,:,:]`: The U-coordinate value of image points.
+- `iuv_arr[2,:,:]`: The V-coordinate value of image points.
+
+
+## Visualization Mode
+
+The general command form is:
+```bash
+python apply_net.py show [-h] [-v] [--min_score <score>] [--nms_thresh <threshold>] [--output <image_file>] <config> <model> <input> <visualizations>
+```
+
+There are four mandatory arguments:
+ - `<config>`, configuration file for a given model;
+ - `<model>`, model file with trained parameters
+ - `<input>`, input image file name, pattern or folder
+ - `<visualizations>`, visualizations specifier; currently available visualizations are:
+   * `bbox` - bounding boxes of detected persons;
+   * `dp_segm` - segmentation masks for detected persons;
+   * `dp_u` - each body part is colored according to the estimated values of the
+     U coordinate in part parameterization;
+   * `dp_v` - each body part is colored according to the estimated values of the
+     V coordinate in part parameterization;
+   * `dp_contour` - plots contours with color-coded U and V coordinates
+
+
+One can additionally provide the following optional arguments:
+ - `--min_score` to only show detections with sufficient scores that are not lower than provided value
+ - `--nms_thresh` to additionally apply non-maximum suppression to detections at a given threshold
+ - `--output` to define visualization file name template, which defaults to `output.png`.
+   To distinguish output file names for different images, the tool appends 1-based entry index,
+   e.g. output.0001.png, output.0002.png, etc...
+
+
+The following examples show how to output results of a DensePose model
+with ResNet-50 FPN backbone using different visualizations for image `image.jpg`:
+
+1. Show bounding box and segmentation:
+```bash
+python apply_net.py show configs/densepose_rcnn_R_50_FPN_s1x.yaml DensePose_ResNet50_FPN_s1x-e2e.pkl image.jpg bbox,dp_segm -v
+```
+![Bounding Box + Segmentation Visualization](images/res_bbox_dp_segm.jpg)
+
+2. Show bounding box and estimated U coordinates for body parts:
+```bash
+python apply_net.py show configs/densepose_rcnn_R_50_FPN_s1x.yaml DensePose_ResNet50_FPN_s1x-e2e.pkl image.jpg bbox,dp_u -v
+```
+![Bounding Box + U Coordinate Visualization](images/res_bbox_dp_u.jpg)
+
+3. Show bounding box and estimated V coordinates for body parts:
+```bash
+python apply_net.py show configs/densepose_rcnn_R_50_FPN_s1x.yaml DensePose_ResNet50_FPN_s1x-e2e.pkl image.jpg bbox,dp_v -v
+```
+![Bounding Box + V Coordinate Visualization](images/res_bbox_dp_v.jpg)
+
+4. Show bounding box and estimated U and V coordinates via contour plots:
+```bash
+python apply_net.py show configs/densepose_rcnn_R_50_FPN_s1x.yaml DensePose_ResNet50_FPN_s1x-e2e.pkl image.jpg dp_contour,bbox -v
+```
+![Bounding Box + Contour Visualization](images/res_bbox_dp_contour.jpg)
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/doc/TOOL_QUERY_DB.md
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/doc/TOOL_QUERY_DB.md
@@ -0,0 +1,105 @@
+
+# Query Dataset
+
+`query_db` is a tool to print or visualize DensePose data from a dataset.
+It has two modes: `print` and `show` to output dataset entries to standard
+output or to visualize them on images.
+
+## Print Mode
+
+The general command form is:
+```bash
+python query_db.py print [-h] [-v] [--max-entries N] <dataset> <selector>
+```
+
+There are two mandatory arguments:
+ - `<dataset>`, DensePose dataset specification, from which to select
+   the entries (e.g. `densepose_coco_2014_train`).
+ - `<selector>`, dataset entry selector which can be a single specification,
+   or a comma-separated list of specifications of the form
+   `field[:type]=value` for exact match with the value
+   or `field[:type]=min-max` for a range of values
+
+One can additionally limit the maximum number of entries to output
+by providing `--max-entries` argument.
+
+Examples:
+
+1. Output at most 10 first entries from the `densepose_coco_2014_train` dataset:
+```bash
+python query_db.py print densepose_coco_2014_train \* --max-entries 10 -v
+```
+
+2. Output all entries with `file_name` equal to `COCO_train2014_000000000036.jpg`: 
+```bash
+python query_db.py print densepose_coco_2014_train file_name=COCO_train2014_000000000036.jpg -v
+```
+
+3. Output all entries with `image_id` between 36 and 156:
+```bash
+python query_db.py print densepose_coco_2014_train image_id:int=36-156 -v
+```
+
+## Visualization Mode
+
+The general command form is:
+```bash
+python query_db.py show [-h] [-v] [--max-entries N] [--output <image_file>] <dataset> <selector> <visualizations>
+```
+
+There are three mandatory arguments:
+ - `<dataset>`, DensePose dataset specification, from which to select
+   the entries (e.g. `densepose_coco_2014_train`).
+ - `<selector>`, dataset entry selector which can be a single specification,
+   or a comma-separated list of specifications of the form
+   `field[:type]=value` for exact match with the value
+   or `field[:type]=min-max` for a range of values
+ - `<visualizations>`, visualizations specifier; currently available visualizations are:
+   * `bbox` - bounding boxes of annotated persons;
+   * `dp_i` - annotated points colored according to the containing part;
+   * `dp_pts` - annotated points in green color;
+   * `dp_segm` - segmentation masks for annotated persons;
+   * `dp_u` - annotated points colored according to their U coordinate in part parameterization;
+   * `dp_v` - annotated points colored according to their V coordinate in part parameterization;
+
+One can additionally provide one of the two optional arguments:
+ - `--max_entries` to limit the maximum number of entries to visualize
+ - `--output` to provide visualization file name template, which defaults
+   to `output.png`. To distinguish file names for different dataset
+   entries, the tool appends 1-based entry index to the output file name,
+   e.g. output.0001.png, output.0002.png, etc.
+
+The following examples show how to output different visualizations for image with `id = 322`
+from `densepose_coco_2014_train` dataset:
+
+1. Show bounding box and segmentation:
+```bash
+python query_db.py show densepose_coco_2014_train image_id:int=322 bbox,dp_segm -v
+```
+![Bounding Box + Segmentation Visualization](images/vis_bbox_dp_segm.jpg)
+
+2. Show bounding box and points colored according to the containing part:
+```bash
+python query_db.py show densepose_coco_2014_train image_id:int=322 bbox,dp_i -v
+```
+![Bounding Box + Point Label Visualization](images/vis_bbox_dp_i.jpg)
+
+3. Show bounding box and annotated points in green color:
+```bash
+python query_db.py show densepose_coco_2014_train image_id:int=322 bbox,dp_segm -v
+```
+![Bounding Box + Point Visualization](images/vis_bbox_dp_pts.jpg)
+
+4. Show bounding box and annotated points colored according to their U coordinate in part parameterization:
+```bash
+python query_db.py show densepose_coco_2014_train image_id:int=322 bbox,dp_u -v
+```
+![Bounding Box + Point U Visualization](images/vis_bbox_dp_u.jpg)
+
+5. Show bounding box and annotated points colored according to their V coordinate in part parameterization:
+```bash
+python query_db.py show densepose_coco_2014_train image_id:int=322 bbox,dp_v -v
+```
+![Bounding Box + Point V Visualization](images/vis_bbox_dp_v.jpg)
+
+
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/query_db.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/query_db.py
@@ -0,0 +1,250 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+import argparse
+import logging
+import os
+import sys
+from timeit import default_timer as timer
+from typing import Any, ClassVar, Dict, List
+import torch
+from fvcore.common.file_io import PathManager
+
+from detectron2.data.catalog import DatasetCatalog
+from detectron2.utils.logger import setup_logger
+
+from densepose.data.structures import DensePoseDataRelative
+from densepose.utils.dbhelper import EntrySelector
+from densepose.utils.logger import verbosity_to_level
+from densepose.vis.base import CompoundVisualizer
+from densepose.vis.bounding_box import BoundingBoxVisualizer
+from densepose.vis.densepose import (
+    DensePoseDataCoarseSegmentationVisualizer,
+    DensePoseDataPointsIVisualizer,
+    DensePoseDataPointsUVisualizer,
+    DensePoseDataPointsVisualizer,
+    DensePoseDataPointsVVisualizer,
+)
+
+DOC = """Query DB - a tool to print / visualize data from a database
+"""
+
+LOGGER_NAME = "query_db"
+
+logger = logging.getLogger(LOGGER_NAME)
+
+_ACTION_REGISTRY: Dict[str, "Action"] = {}
+
+
+class Action(object):
+    @classmethod
+    def add_arguments(cls: type, parser: argparse.ArgumentParser):
+        parser.add_argument(
+            "-v",
+            "--verbosity",
+            action="count",
+            help="Verbose mode. Multiple -v options increase the verbosity.",
+        )
+
+
+def register_action(cls: type):
+    """
+    Decorator for action classes to automate action registration
+    """
+    global _ACTION_REGISTRY
+    _ACTION_REGISTRY[cls.COMMAND] = cls
+    return cls
+
+
+class EntrywiseAction(Action):
+    @classmethod
+    def add_arguments(cls: type, parser: argparse.ArgumentParser):
+        super(EntrywiseAction, cls).add_arguments(parser)
+        parser.add_argument(
+            "dataset", metavar="<dataset>", help="Dataset name (e.g. densepose_coco_2014_train)"
+        )
+        parser.add_argument(
+            "selector",
+            metavar="<selector>",
+            help="Dataset entry selector in the form field1[:type]=value1[,"
+            "field2[:type]=value_min-value_max...] which selects all "
+            "entries from the dataset that satisfy the constraints",
+        )
+        parser.add_argument(
+            "--max-entries", metavar="N", help="Maximum number of entries to process", type=int
+        )
+
+    @classmethod
+    def execute(cls: type, args: argparse.Namespace):
+        dataset = setup_dataset(args.dataset)
+        entry_selector = EntrySelector.from_string(args.selector)
+        context = cls.create_context(args)
+        if args.max_entries is not None:
+            for _, entry in zip(range(args.max_entries), dataset):
+                if entry_selector(entry):
+                    cls.execute_on_entry(entry, context)
+        else:
+            for entry in dataset:
+                if entry_selector(entry):
+                    cls.execute_on_entry(entry, context)
+
+    @classmethod
+    def create_context(cls: type, args: argparse.Namespace) -> Dict[str, Any]:
+        context = {}
+        return context
+
+
+@register_action
+class PrintAction(EntrywiseAction):
+    """
+    Print action that outputs selected entries to stdout
+    """
+
+    COMMAND: ClassVar[str] = "print"
+
+    @classmethod
+    def add_parser(cls: type, subparsers: argparse._SubParsersAction):
+        parser = subparsers.add_parser(cls.COMMAND, help="Output selected entries to stdout. ")
+        cls.add_arguments(parser)
+        parser.set_defaults(func=cls.execute)
+
+    @classmethod
+    def add_arguments(cls: type, parser: argparse.ArgumentParser):
+        super(PrintAction, cls).add_arguments(parser)
+
+    @classmethod
+    def execute_on_entry(cls: type, entry: Dict[str, Any], context: Dict[str, Any]):
+        import pprint
+
+        printer = pprint.PrettyPrinter(indent=2, width=200, compact=True)
+        printer.pprint(entry)
+
+
+@register_action
+class ShowAction(EntrywiseAction):
+    """
+    Show action that visualizes selected entries on an image
+    """
+
+    COMMAND: ClassVar[str] = "show"
+    VISUALIZERS: ClassVar[Dict[str, object]] = {
+        "dp_segm": DensePoseDataCoarseSegmentationVisualizer(),
+        "dp_i": DensePoseDataPointsIVisualizer(),
+        "dp_u": DensePoseDataPointsUVisualizer(),
+        "dp_v": DensePoseDataPointsVVisualizer(),
+        "dp_pts": DensePoseDataPointsVisualizer(),
+        "bbox": BoundingBoxVisualizer(),
+    }
+
+    @classmethod
+    def add_parser(cls: type, subparsers: argparse._SubParsersAction):
+        parser = subparsers.add_parser(cls.COMMAND, help="Visualize selected entries")
+        cls.add_arguments(parser)
+        parser.set_defaults(func=cls.execute)
+
+    @classmethod
+    def add_arguments(cls: type, parser: argparse.ArgumentParser):
+        super(ShowAction, cls).add_arguments(parser)
+        parser.add_argument(
+            "visualizations",
+            metavar="<visualizations>",
+            help="Comma separated list of visualizations, possible values: "
+            "[{}]".format(",".join(sorted(cls.VISUALIZERS.keys()))),
+        )
+        parser.add_argument(
+            "--output",
+            metavar="<image_file>",
+            default="output.png",
+            help="File name to save output to",
+        )
+
+    @classmethod
+    def execute_on_entry(cls: type, entry: Dict[str, Any], context: Dict[str, Any]):
+        import cv2
+        import numpy as np
+
+        image_fpath = PathManager.get_local_path(entry["file_name"])
+        image = cv2.imread(image_fpath, cv2.IMREAD_GRAYSCALE)
+        image = np.tile(image[:, :, np.newaxis], [1, 1, 3])
+        datas = cls._extract_data_for_visualizers_from_entry(context["vis_specs"], entry)
+        visualizer = context["visualizer"]
+        image_vis = visualizer.visualize(image, datas)
+        entry_idx = context["entry_idx"] + 1
+        out_fname = cls._get_out_fname(entry_idx, context["out_fname"])
+        cv2.imwrite(out_fname, image_vis)
+        logger.info(f"Output saved to {out_fname}")
+        context["entry_idx"] += 1
+
+    @classmethod
+    def _get_out_fname(cls: type, entry_idx: int, fname_base: str):
+        base, ext = os.path.splitext(fname_base)
+        return base + ".{0:04d}".format(entry_idx) + ext
+
+    @classmethod
+    def create_context(cls: type, args: argparse.Namespace) -> Dict[str, Any]:
+        vis_specs = args.visualizations.split(",")
+        visualizers = []
+        for vis_spec in vis_specs:
+            vis = cls.VISUALIZERS[vis_spec]
+            visualizers.append(vis)
+        context = {
+            "vis_specs": vis_specs,
+            "visualizer": CompoundVisualizer(visualizers),
+            "out_fname": args.output,
+            "entry_idx": 0,
+        }
+        return context
+
+    @classmethod
+    def _extract_data_for_visualizers_from_entry(
+        cls: type, vis_specs: List[str], entry: Dict[str, Any]
+    ):
+        dp_list = []
+        bbox_list = []
+        for annotation in entry["annotations"]:
+            is_valid, _ = DensePoseDataRelative.validate_annotation(annotation)
+            if not is_valid:
+                continue
+            bbox = torch.as_tensor(annotation["bbox"])
+            bbox_list.append(bbox)
+            dp_data = DensePoseDataRelative(annotation)
+            dp_list.append(dp_data)
+        datas = []
+        for vis_spec in vis_specs:
+            datas.append(bbox_list if "bbox" == vis_spec else (bbox_list, dp_list))
+        return datas
+
+
+def setup_dataset(dataset_name):
+    logger.info("Loading dataset {}".format(dataset_name))
+    start = timer()
+    dataset = DatasetCatalog.get(dataset_name)
+    stop = timer()
+    logger.info("Loaded dataset {} in {:.3f}s".format(dataset_name, stop - start))
+    return dataset
+
+
+def create_argument_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(
+        description=DOC,
+        formatter_class=lambda prog: argparse.HelpFormatter(prog, max_help_position=120),
+    )
+    parser.set_defaults(func=lambda _: parser.print_help(sys.stdout))
+    subparsers = parser.add_subparsers(title="Actions")
+    for _, action in _ACTION_REGISTRY.items():
+        action.add_parser(subparsers)
+    return parser
+
+
+def main():
+    parser = create_argument_parser()
+    args = parser.parse_args()
+    verbosity = args.verbosity if hasattr(args, "verbosity") else None
+    global logger
+    logger = setup_logger(name=LOGGER_NAME)
+    logger.setLevel(verbosity_to_level(verbosity))
+    args.func(args)
+
+
+if __name__ == "__main__":
+    main()
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/tests/common.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/tests/common.py
@@ -0,0 +1,110 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+
+import os
+import torch
+
+from detectron2.config import get_cfg
+from detectron2.engine import default_setup
+from detectron2.modeling import build_model
+
+from densepose import add_dataset_category_config, add_densepose_config
+
+_BASE_CONFIG_DIR = "configs"
+_EVOLUTION_CONFIG_SUB_DIR = "evolution"
+_QUICK_SCHEDULES_CONFIG_SUB_DIR = "quick_schedules"
+_BASE_CONFIG_FILE_PREFIX = "Base-"
+_CONFIG_FILE_EXT = ".yaml"
+
+
+def _get_base_config_dir():
+    """
+    Return the base directory for configurations
+    """
+    return os.path.join(os.path.dirname(os.path.realpath(__file__)), "..", _BASE_CONFIG_DIR)
+
+
+def _get_evolution_config_dir():
+    """
+    Return the base directory for evolution configurations
+    """
+    return os.path.join(_get_base_config_dir(), _EVOLUTION_CONFIG_SUB_DIR)
+
+
+def _get_quick_schedules_config_dir():
+    """
+    Return the base directory for quick schedules configurations
+    """
+    return os.path.join(_get_base_config_dir(), _QUICK_SCHEDULES_CONFIG_SUB_DIR)
+
+
+def _collect_config_files(config_dir):
+    """
+    Collect all configuration files (i.e. densepose_*.yaml) directly in the specified directory
+    """
+    start = _get_base_config_dir()
+    results = []
+    for entry in os.listdir(config_dir):
+        path = os.path.join(config_dir, entry)
+        if not os.path.isfile(path):
+            continue
+        _, ext = os.path.splitext(entry)
+        if ext != _CONFIG_FILE_EXT:
+            continue
+        if entry.startswith(_BASE_CONFIG_FILE_PREFIX):
+            continue
+        config_file = os.path.relpath(path, start)
+        results.append(config_file)
+    return results
+
+
+def get_config_files():
+    """
+    Get all the configuration files (relative to the base configuration directory)
+    """
+    return _collect_config_files(_get_base_config_dir())
+
+
+def get_evolution_config_files():
+    """
+    Get all the evolution configuration files (relative to the base configuration directory)
+    """
+    return _collect_config_files(_get_evolution_config_dir())
+
+
+def get_quick_schedules_config_files():
+    """
+    Get all the quick schedules configuration files (relative to the base configuration directory)
+    """
+    return _collect_config_files(_get_quick_schedules_config_dir())
+
+
+def _get_model_config(config_file):
+    """
+    Load and return the configuration from the specified file (relative to the base configuration
+    directory)
+    """
+    cfg = get_cfg()
+    add_dataset_category_config(cfg)
+    add_densepose_config(cfg)
+    path = os.path.join(_get_base_config_dir(), config_file)
+    cfg.merge_from_file(path)
+    if not torch.cuda.is_available():
+        cfg.MODEL_DEVICE = "cpu"
+    return cfg
+
+
+def get_model(config_file):
+    """
+    Get the model from the specified file (relative to the base configuration directory)
+    """
+    cfg = _get_model_config(config_file)
+    return build_model(cfg)
+
+
+def setup(config_file):
+    """
+    Setup the configuration from the specified file (relative to the base configuration directory)
+    """
+    cfg = _get_model_config(config_file)
+    cfg.freeze()
+    default_setup(cfg, {})
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/tests/test_model_e2e.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/tests/test_model_e2e.py
@@ -0,0 +1,43 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+
+import unittest
+import torch
+
+from detectron2.structures import BitMasks, Boxes, Instances
+
+from .common import get_model
+
+
+# TODO(plabatut): Modularize detectron2 tests and re-use
+def make_model_inputs(image, instances=None):
+    if instances is None:
+        return {"image": image}
+
+    return {"image": image, "instances": instances}
+
+
+def make_empty_instances(h, w):
+    instances = Instances((h, w))
+    instances.gt_boxes = Boxes(torch.rand(0, 4))
+    instances.gt_classes = torch.tensor([]).to(dtype=torch.int64)
+    instances.gt_masks = BitMasks(torch.rand(0, h, w))
+    return instances
+
+
+class ModelE2ETest(unittest.TestCase):
+    CONFIG_PATH = ""
+
+    def setUp(self):
+        self.model = get_model(self.CONFIG_PATH)
+
+    def _test_eval(self, sizes):
+        inputs = [make_model_inputs(torch.rand(3, size[0], size[1])) for size in sizes]
+        self.model.eval()
+        self.model(inputs)
+
+
+class DensePoseRCNNE2ETest(ModelE2ETest):
+    CONFIG_PATH = "densepose_rcnn_R_101_FPN_s1x.yaml"
+
+    def test_empty_data(self):
+        self._test_eval([(200, 250), (200, 249)])
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/tests/test_setup.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/tests/test_setup.py
@@ -0,0 +1,30 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+
+import unittest
+
+from .common import (
+    get_config_files,
+    get_evolution_config_files,
+    get_quick_schedules_config_files,
+    setup,
+)
+
+
+class TestSetup(unittest.TestCase):
+    def _test_setup(self, config_file):
+        setup(config_file)
+
+    def test_setup_configs(self):
+        config_files = get_config_files()
+        for config_file in config_files:
+            self._test_setup(config_file)
+
+    def test_setup_evolution_configs(self):
+        config_files = get_evolution_config_files()
+        for config_file in config_files:
+            self._test_setup(config_file)
+
+    def test_setup_quick_schedules_configs(self):
+        config_files = get_quick_schedules_config_files()
+        for config_file in config_files:
+            self._test_setup(config_file)
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/tests/test_structures.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/tests/test_structures.py
@@ -0,0 +1,25 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+
+import unittest
+
+from densepose.data.structures import normalized_coords_transform
+
+
+class TestStructures(unittest.TestCase):
+    def test_normalized_coords_transform(self):
+        bbox = (32, 24, 288, 216)
+        x0, y0, w, h = bbox
+        xmin, ymin, xmax, ymax = x0, y0, x0 + w, y0 + h
+        f = normalized_coords_transform(*bbox)
+        # Top-left
+        expected_p, actual_p = (-1, -1), f((xmin, ymin))
+        self.assertEqual(expected_p, actual_p)
+        # Top-right
+        expected_p, actual_p = (1, -1), f((xmax, ymin))
+        self.assertEqual(expected_p, actual_p)
+        # Bottom-left
+        expected_p, actual_p = (-1, 1), f((xmin, ymax))
+        self.assertEqual(expected_p, actual_p)
+        # Bottom-right
+        expected_p, actual_p = (1, 1), f((xmax, ymax))
+        self.assertEqual(expected_p, actual_p)
--- a/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/train_net.py
+++ b/vton-api/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/train_net.py
@@ -0,0 +1,122 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+"""
+DensePose Training Script.
+
+This script is similar to the training script in detectron2/tools.
+
+It is an example of how a user might use detectron2 for a new project.
+"""
+
+import logging
+import os
+from collections import OrderedDict
+from fvcore.common.file_io import PathManager
+
+import detectron2.utils.comm as comm
+from detectron2.checkpoint import DetectionCheckpointer
+from detectron2.config import CfgNode, get_cfg
+from detectron2.engine import DefaultTrainer, default_argument_parser, default_setup, hooks, launch
+from detectron2.evaluation import COCOEvaluator, DatasetEvaluators, verify_results
+from detectron2.modeling import DatasetMapperTTA
+from detectron2.utils.logger import setup_logger
+
+from densepose import (
+    DensePoseCOCOEvaluator,
+    DensePoseGeneralizedRCNNWithTTA,
+    add_dataset_category_config,
+    add_densepose_config,
+    load_from_cfg,
+)
+from densepose.data import DatasetMapper, build_detection_test_loader, build_detection_train_loader
+
+
+class Trainer(DefaultTrainer):
+    @classmethod
+    def build_evaluator(cls, cfg: CfgNode, dataset_name, output_folder=None):
+        if output_folder is None:
+            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference")
+        evaluators = [COCOEvaluator(dataset_name, cfg, True, output_folder)]
+        if cfg.MODEL.DENSEPOSE_ON:
+            evaluators.append(DensePoseCOCOEvaluator(dataset_name, True, output_folder))
+        return DatasetEvaluators(evaluators)
+
+    @classmethod
+    def build_test_loader(cls, cfg: CfgNode, dataset_name):
+        return build_detection_test_loader(cfg, dataset_name, mapper=DatasetMapper(cfg, False))
+
+    @classmethod
+    def build_train_loader(cls, cfg: CfgNode):
+        return build_detection_train_loader(cfg, mapper=DatasetMapper(cfg, True))
+
+    @classmethod
+    def test_with_TTA(cls, cfg: CfgNode, model):
+        logger = logging.getLogger("detectron2.trainer")
+        # In the end of training, run an evaluation with TTA
+        # Only support some R-CNN models.
+        logger.info("Running inference with test-time augmentation ...")
+        transform_data = load_from_cfg(cfg)
+        model = DensePoseGeneralizedRCNNWithTTA(cfg, model, transform_data, DatasetMapperTTA(cfg))
+        evaluators = [
+            cls.build_evaluator(
+                cfg, name, output_folder=os.path.join(cfg.OUTPUT_DIR, "inference_TTA")
+            )
+            for name in cfg.DATASETS.TEST
+        ]
+        res = cls.test(cfg, model, evaluators)
+        res = OrderedDict({k + "_TTA": v for k, v in res.items()})
+        return res
+
+
+def setup(args):
+    cfg = get_cfg()
+    add_dataset_category_config(cfg)
+    add_densepose_config(cfg)
+    cfg.merge_from_file(args.config_file)
+    cfg.merge_from_list(args.opts)
+    cfg.freeze()
+    default_setup(cfg, args)
+    # Setup logger for "densepose" module
+    setup_logger(output=cfg.OUTPUT_DIR, distributed_rank=comm.get_rank(), name="densepose")
+    return cfg
+
+
+def main(args):
+    cfg = setup(args)
+    # disable strict kwargs checking: allow one to specify path handle
+    # hints through kwargs, like timeout in DP evaluation
+    PathManager.set_strict_kwargs_checking(False)
+
+    if args.eval_only:
+        model = Trainer.build_model(cfg)
+        DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load(
+            cfg.MODEL.WEIGHTS, resume=args.resume
+        )
+        res = Trainer.test(cfg, model)
+        if cfg.TEST.AUG.ENABLED:
+            res.update(Trainer.test_with_TTA(cfg, model))
+        if comm.is_main_process():
+            verify_results(cfg, res)
+        return res
+
+    trainer = Trainer(cfg)
+    trainer.resume_or_load(resume=args.resume)
+    if cfg.TEST.AUG.ENABLED:
+        trainer.register_hooks(
+            [hooks.EvalHook(0, lambda: trainer.test_with_TTA(cfg, trainer.model))]
+        )
+    return trainer.train()
+
+
+if __name__ == "__main__":
+    args = default_argument_parser().parse_args()
+    print("Command Line Args:", args)
+    launch(
+        main,
+        args.num_gpus,
+        num_machines=args.num_machines,
+        machine_rank=args.machine_rank,
+        dist_url=args.dist_url,
+        args=(args,),
+    )